### North Dakota Production Data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from os import listdir


The data is available from Enigma as a huge file aggregating data by each oil well, but is not aggregated by county. This step is done here.

In [2]:
nd_oil_prod = pd.read_csv('./OilGasProduction/NorthDakota/EnigmaProductionData_Curated.csv')

In [3]:
nd_oil_prod.head()

Unnamed: 0,api_number,month,oil,gas,water,serialid
0,33025010130000,10-2015,1320,1858,253,2459818
1,33025010140000,10-2015,1784,1743,826,2459819
2,33025010160000,10-2015,3178,2542,1090,2459820
3,33025010170000,10-2015,4288,3431,1754,2459821
4,33025010180000,10-2015,1591,1750,213,2459822


In [4]:
nd_oil_prod.describe()

Unnamed: 0,api_number,oil,gas,water,serialid
count,2741562.0,2741562.0,2741562.0,2741562.0,2741562.0
mean,33044850000000.0,1415.911,1770.973,2111.38,1370782.0
std,33972570000.0,2740.403,6809.53,4741.633,791420.9
min,33001000000000.0,-8.0,0.0,-64.0,1.0
25%,33011010000000.0,145.0,0.0,54.0,685391.2
50%,33053000000000.0,512.0,176.0,641.0,1370782.0
75%,33061020000000.0,1553.0,1375.0,2240.0,2056172.0
max,33105040000000.0,445723.0,812411.0,246834.0,2741562.0


In [5]:
nd_oil_prod['COUNTY_API_CODE'] = [x[:5] for x in nd_oil_prod.api_number.astype(str)]

In [6]:
api_codes = pd.read_csv('./OilGasProduction/State_County_API.csv')

In [7]:
api_codes.head()

Unnamed: 0.1,Unnamed: 0,API_State_Code,State_Name,County_Code,County_Name
0,0,1,ALABAMA,1,AUTAUGA
1,1,1,ALABAMA,3,BALDWIN
2,2,1,ALABAMA,5,BARBOUR
3,3,1,ALABAMA,7,BIBB
4,4,1,ALABAMA,9,BLOUNT


In [8]:
api_codes['API_County_Codes'] = ["%02d%03d" % (x,y) for x,y in zip(api_codes['API_State_Code'], api_codes['County_Code'])] 

In [9]:
api_county_dict = dict(zip(api_codes['API_County_Codes'],api_codes['County_Name']))

In [10]:
nd_oil_prod['County_Name'] = nd_oil_prod['COUNTY_API_CODE'].map(api_county_dict)

In [11]:
nd_oil_prod.COUNTY_API_CODE.unique()

array(['33025', '33033', '33049', '33053', '33055', '33061', '33075',
       '33087', '33089', '33101', '33105', '33007', '33009', '33011',
       '33023', '33013', '33041', '33057', '33001'], dtype=object)

In [12]:
nd_oil_prod.County_Name.unique()

array(['DUNN', 'GOLDEN VALLEY', 'MC HENRY', 'MC KENZIE', 'MCLEAN',
       'MOUNTRAIL', 'RENVILLE', 'SLOPE', 'STARK', 'WARD', 'WILLIAMS',
       'BILLINGS', 'BOTTINEAU', 'BOWMAN', 'DIVIDE', 'BURKE', 'HETTINGER',
       'MERCER', 'ADAMS'], dtype=object)

In [13]:
api_codes[api_codes['State_Name'] == 'NORTH DAKOTA'].shape

(53, 6)

In [14]:
nd_oil_month = nd_oil_prod.drop(['gas','water','serialid','api_number','COUNTY_API_CODE'],axis=1).groupby(['County_Name','month']).agg('sum')

In [15]:
nd_oil_month = nd_oil_month.reset_index()

In [16]:
nd_oil_month.month = pd.to_datetime(nd_oil_month.month)

In [17]:
nd_oil_month.head()

Unnamed: 0,County_Name,month,oil
0,ADAMS,1995-01-01,20
1,ADAMS,1996-01-01,0
2,ADAMS,1997-01-01,0
3,ADAMS,1995-02-01,0
4,ADAMS,1996-02-01,0


In [18]:
nd_oil_month.columns = ['County_Name','Date','Oil_Production']

In [19]:
nd_oil_month.Date = nd_oil_month.Date.dt.strftime('%m/%Y')

In [20]:
nd_oil_month.head()

Unnamed: 0,County_Name,Date,Oil_Production
0,ADAMS,01/1995,20
1,ADAMS,01/1996,0
2,ADAMS,01/1997,0
3,ADAMS,02/1995,0
4,ADAMS,02/1996,0


In [21]:
nd_oil_month.County_Name.unique()

array(['ADAMS', 'BILLINGS', 'BOTTINEAU', 'BOWMAN', 'BURKE', 'DIVIDE',
       'DUNN', 'GOLDEN VALLEY', 'HETTINGER', 'MC HENRY', 'MC KENZIE',
       'MCLEAN', 'MERCER', 'MOUNTRAIL', 'RENVILLE', 'SLOPE', 'STARK',
       'WARD', 'WILLIAMS'], dtype=object)

The county names are in a different format than the Texas data, and the Unemployment data. Here I'm using the unemployment data to correct the format.

In [22]:
nd_unemp = pd.read_csv('./UnemploymentData/nd_unemployment.csv')

In [23]:
nd_unemp_melted = pd.melt(nd_unemp,id_vars = ['Time'],var_name='County_Name',value_name='Unemployment_Rate')

In [24]:
nd_unemp_melted.head()

Unnamed: 0,Time,County_Name,Unemployment_Rate
0,01/1990,"Adams County, ND",2.1
1,01/1991,"Adams County, ND",3.4
2,01/1992,"Adams County, ND",3.2
3,01/1993,"Adams County, ND",2.2
4,01/1994,"Adams County, ND",1.9


In [25]:
nd_format_county = dict()

for county in nd_oil_month.County_Name.unique():
    nd_format_county[county] = ''.join([s for s in nd_unemp_melted.County_Name.unique() if county.replace(' ','').lower() in s.replace(' ','').lower()])

In [26]:
nd_format_county

{'ADAMS': 'Adams County, ND',
 'BILLINGS': 'Billings County, ND',
 'BOTTINEAU': 'Bottineau County, ND',
 'BOWMAN': 'Bowman County, ND',
 'BURKE': 'Burke County, ND',
 'DIVIDE': 'Divide County, ND',
 'DUNN': 'Dunn County, ND',
 'GOLDEN VALLEY': 'Golden Valley County, ND',
 'HETTINGER': 'Hettinger County, ND',
 'MC HENRY': 'McHenry County, ND',
 'MC KENZIE': 'McKenzie County, ND',
 'MCLEAN': 'McLean County, ND',
 'MERCER': 'Mercer County, ND',
 'MOUNTRAIL': 'Mountrail County, ND',
 'RENVILLE': 'Renville County, ND',
 'SLOPE': 'Slope County, ND',
 'STARK': 'Stark County, ND',
 'WARD': 'Ward County, ND',
 'WILLIAMS': 'Williams County, ND'}

In [27]:
nd_oil_month.County_Name = nd_oil_month.County_Name.map(nd_format_county)

In [28]:
nd_oil_month = nd_oil_month.groupby(['County_Name','Date']).agg({'Oil_Production':'max'})

In [29]:
nd_oil_month = nd_oil_month.reset_index()

In [30]:
nd_oil_month = nd_oil_month.pivot(index='Date',columns='County_Name',values='Oil_Production')

In [31]:
nd_oil_month.head()

County_Name,"Adams County, ND","Billings County, ND","Bottineau County, ND","Bowman County, ND","Burke County, ND","Divide County, ND","Dunn County, ND","Golden Valley County, ND","Hettinger County, ND","McHenry County, ND","McKenzie County, ND","McLean County, ND","Mercer County, ND","Mountrail County, ND","Renville County, ND","Slope County, ND","Stark County, ND","Ward County, ND","Williams County, ND"
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
01/1952,,,,,,,,,,,,,,,,,,,7166.0
01/1953,,,293.0,,,,,,,,2683.0,,,48367.0,,,,,405895.0
01/1954,,9001.0,1635.0,,1255.0,,,,,,20670.0,,,84835.0,,,,,335948.0
01/1955,,21178.0,12933.0,,7099.0,,,,,,64412.0,,,220109.0,,,,,669576.0
01/1956,,18302.0,32257.0,,12525.0,,,,,,145492.0,,,261804.0,,,365.0,,862845.0


In [32]:
nd_oil_month = nd_oil_month.fillna(0)

In [33]:
nd_oil_month.to_csv('./OilGasProduction/NorthDakota/NDOilProdCounty.csv')