## Wyoming oil production data

Oklahoma produces more oil than Wyoming, but I could not get clean historical oil production data for Oklahoma. So I decided to switch to Wyoming. Wyoming is however also a major oil producing state.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from os import listdir

In [2]:
wy_oil_prod = pd.read_csv('EnigmaProduction_withCounty.csv')

In [3]:
wy_oil_prod.head()

Unnamed: 0,api_no,wellname,county,company,field_name,horiz_dir,sec,twp,t_dir,rge,...,gravity03,gravity04,gravity05,gravity06,gravity07,gravity08,gravity09,gravity10,gravity11,gravity12
0,3720417,BRADY 4W GAS STORE 46,37,NORTH SHORE EXPLORATION & PROD LLC,BRADY,N,12,16,N,101,...,0.0,0.0,0.0,52.0,52,52.0,51.0,53.0,53.0,50.0
1,3720418,CHAMPLIN 1-7,37,SAMSON RESOURCES COMPANY,ANTELOPE,N,7,17,N,99,...,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0
2,3720435,SOUTH BRADY UT 64,37,ANADARKO E&P ONSHORE LLC,BRADY,N,2,16,N,101,...,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0
3,3720441,FEDERAL 1-8,37,PURE PETROLEUM LLC,PRIMITIVE,N,8,16,N,94,...,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0
4,3720451,FEDERAL 1-18,37,FOUNDATION ENERGY MANAGMENT LLC,WILD ROSE,N,18,17,N,94,...,0.0,0.0,0.0,53.0,0,0.0,0.0,0.0,54.0,0.0


In [4]:
np.sort(wy_oil_prod.county.unique())

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33,
       35, 37, 39, 41, 43, 45])

In [5]:
wy_oil_prod.county.nunique()

23

Wyoming does indeed have 23 counties, so we should have correct data.

In [6]:
wy_oil_prod.columns

Index(['api_no', 'wellname', 'county', 'company', 'field_name', 'horiz_dir',
       'sec', 'twp', 't_dir', 'rge', 'r_dir', 'qtr1', 'qtr2', 'lat', 'long',
       'year', 'jan_oil', 'jan_gas', 'jan_water', 'jan_days', 'feb_oil',
       'feb_gas', 'feb_water', 'feb_days', 'mar_oil', 'mar_gas', 'mar_water',
       'mar_days', 'apr_oil', 'apr_gas', 'apr_water', 'apr_days', 'may_oil',
       'may_gas', 'may_water', 'may_days', 'jun_oil', 'jun_gas', 'jun_water',
       'jun_days', 'jul_oil', 'jul_gas', 'jul_water', 'jul_days', 'aug_oil',
       'aug_gas', 'aug_water', 'aug_days', 'sep_oil', 'sep_gas', 'sep_water',
       'sep_days', 'oct_oil', 'oct_gas', 'oct_water', 'oct_days', 'nov_oil',
       'nov_gas', 'nov_water', 'nov_days', 'dec_oil', 'dec_gas', 'dec_water',
       'dec_days', 'formation', 'wellclass', 'coalbed', 'gravity01',
       'gravity02', 'gravity03', 'gravity04', 'gravity05', 'gravity06',
       'gravity07', 'gravity08', 'gravity09', 'gravity10', 'gravity11',
       'gravity12

Isolating only the oil production:

In [7]:
wy_oil_cols = [x for x in wy_oil_prod.columns if 'oil' in x]

Add in year and county information to the oil columns:

In [8]:
wy_oil_cols = ['year','county'] + wy_oil_cols 

In [9]:
wy_oil_small = wy_oil_prod[wy_oil_cols]

In [10]:
wy_oil_small.head()

Unnamed: 0,year,county,jan_oil,feb_oil,mar_oil,apr_oil,may_oil,jun_oil,jul_oil,aug_oil,sep_oil,oct_oil,nov_oil,dec_oil
0,2007,37,0,0,0,0,0,0,0,0,0,0,0.0,0
1,2007,37,0,0,0,0,0,0,0,0,0,0,0.0,0
2,2007,37,0,0,0,0,0,0,0,0,0,0,0.0,0
3,2007,37,27,0,30,17,62,15,0,25,25,0,0.0,0
4,2007,37,0,0,0,127,54,47,50,45,36,42,126.0,28


Now we can aggregate the data by year and county. Since the columns are already split by month, we do not need to group by month.

In [11]:
wy_oil_month = wy_oil_small.groupby(['year','county']).agg('sum')

In [12]:
wy_oil_month.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,jan_oil,feb_oil,mar_oil,apr_oil,may_oil,jun_oil,jul_oil,aug_oil,sep_oil,oct_oil,nov_oil,dec_oil
year,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1978,1,15284,13252,15965,14826,14805,13843,14004,14175,13975,14459,13466.0,11625
1978,3,592214,536490,618168,566927,586773,573317,587189,575776,554680,567429,534931.0,584432
1978,5,2736960,2545778,3039386,2970991,2807554,2727369,2634865,2668437,2694470,2588294,2413014.0,2087176
1978,7,81937,67972,74951,77276,70392,73878,75608,77259,74186,85038,83313.0,67453
1978,9,703327,706696,738703,722868,642479,749429,754390,783308,700533,677918,633683.0,448408


In [13]:
wy_oil_month.reset_index(inplace=True)

We now fix the formatting of the month columns.

In [14]:
wy_oil_month = pd.melt(wy_oil_month, id_vars = ['year','county'],var_name='month')

In [15]:
month_dict = dict(zip(['jan_oil', 'feb_oil', 'mar_oil', 'apr_oil', 'may_oil',
       'jun_oil', 'jul_oil', 'aug_oil', 'sep_oil', 'oct_oil', 'nov_oil',
       'dec_oil'],range(1,13)))

In [16]:
month_dict

{'jan_oil': 1,
 'feb_oil': 2,
 'mar_oil': 3,
 'apr_oil': 4,
 'may_oil': 5,
 'jun_oil': 6,
 'jul_oil': 7,
 'aug_oil': 8,
 'sep_oil': 9,
 'oct_oil': 10,
 'nov_oil': 11,
 'dec_oil': 12}

In [17]:
wy_oil_month.month.unique()

array(['jan_oil', 'feb_oil', 'mar_oil', 'apr_oil', 'may_oil', 'jun_oil',
       'jul_oil', 'aug_oil', 'sep_oil', 'oct_oil', 'nov_oil', 'dec_oil'],
      dtype=object)

In [18]:
wy_oil_month.month = wy_oil_month.month.map(month_dict)

In [19]:
wy_oil_month.month.unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [20]:
wy_oil_month.head()

Unnamed: 0,year,county,month,value
0,1978,1,1,15284.0
1,1978,3,1,592214.0
2,1978,5,1,2736960.0
3,1978,7,1,81937.0
4,1978,9,1,703327.0


We can now get the date of production, by county, in the format (%m/%Y).

In [21]:
wy_oil_month['Date'] = wy_oil_month['month'].astype(str) + '/' + wy_oil_month['year'].astype(str)

In [22]:
wy_oil_month.head()

Unnamed: 0,year,county,month,value,Date
0,1978,1,1,15284.0,1/1978
1,1978,3,1,592214.0,1/1978
2,1978,5,1,2736960.0,1/1978
3,1978,7,1,81937.0,1/1978
4,1978,9,1,703327.0,1/1978


In [23]:
wy_oil_month.Date = pd.to_datetime(wy_oil_month.Date)

In [24]:
wy_oil_month.head()

Unnamed: 0,year,county,month,value,Date
0,1978,1,1,15284.0,1978-01-01
1,1978,3,1,592214.0,1978-01-01
2,1978,5,1,2736960.0,1978-01-01
3,1978,7,1,81937.0,1978-01-01
4,1978,9,1,703327.0,1978-01-01


In [25]:
wy_oil_month.Date = wy_oil_month.Date.dt.strftime('%m/%Y')

In [26]:
wy_oil_month.head()

Unnamed: 0,year,county,month,value,Date
0,1978,1,1,15284.0,01/1978
1,1978,3,1,592214.0,01/1978
2,1978,5,1,2736960.0,01/1978
3,1978,7,1,81937.0,01/1978
4,1978,9,1,703327.0,01/1978


We can now map the county numbers to the actual county names. We use the county FIPS codes table provided in the repository.

In [27]:
area_codes = pd.read_csv('../../Unemployment/BLS_AreaCodes.txt',sep='\t',index_col=False)
county_codes = area_codes[area_codes['area_type_code'] == 'F']
county_codes = county_codes.reset_index(drop=True)

In [28]:
county_codes['FIPS code'] = list(map(lambda x: x[2:7],county_codes.area_code))
county_FIPS_names = dict(zip(county_codes['FIPS code'],county_codes['area_text']))

In [29]:
wy_oil_month['County_FIPS_Code'] = ["56%03d" % x for x in wy_oil_month.county]

In [30]:
wy_oil_month['County_Name'] = wy_oil_month['County_FIPS_Code'].map(county_FIPS_names)

In [31]:
wy_oil_month.head()

Unnamed: 0,year,county,month,value,Date,County_FIPS_Code,County_Name
0,1978,1,1,15284.0,01/1978,56001,"Albany County, WY"
1,1978,3,1,592214.0,01/1978,56003,"Big Horn County, WY"
2,1978,5,1,2736960.0,01/1978,56005,"Campbell County, WY"
3,1978,7,1,81937.0,01/1978,56007,"Carbon County, WY"
4,1978,9,1,703327.0,01/1978,56009,"Converse County, WY"


In [32]:
wy_oil_month.County_Name.nunique()

23

Dropping data we don't need:

In [33]:
wy_oil_month = wy_oil_month[['Date','County_Name','value']]

In [34]:
wy_oil_month.columns = ['Date','County_Name','Oil_Production']

In [35]:
wy_oil_month = wy_oil_month.pivot(index='Date',columns='County_Name',values='Oil_Production')

In [36]:
wy_oil_month.head()

County_Name,"Albany County, WY","Big Horn County, WY","Campbell County, WY","Carbon County, WY","Converse County, WY","Crook County, WY","Fremont County, WY","Goshen County, WY","Hot Springs County, WY","Johnson County, WY",...,"Niobrara County, WY","Park County, WY","Platte County, WY","Sheridan County, WY","Sublette County, WY","Sweetwater County, WY","Teton County, WY","Uinta County, WY","Washakie County, WY","Weston County, WY"
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01/1978,15284.0,592214.0,2736960.0,81937.0,703327.0,251328.0,585346.0,148.0,971398.0,312982.0,...,61770.0,2449367.0,0.0,14346.0,218379.0,1110339.0,0.0,98920.0,179062.0,187689.0
01/1979,9540.0,545723.0,2227846.0,74866.0,567789.0,185432.0,550979.0,899.0,948356.0,303007.0,...,65122.0,2462386.0,0.0,17379.0,184025.0,1141155.0,0.0,264327.0,158802.0,146719.0
01/1980,12619.0,514451.0,1906703.0,133806.0,604764.0,249600.0,593488.0,216.0,864407.0,296805.0,...,128582.0,2358797.0,0.0,13779.0,162901.0,1005964.0,0.0,277095.0,185251.0,211796.0
01/1981,14555.0,493278.0,1875626.0,176513.0,599660.0,245288.0,592677.0,127.0,859985.0,269188.0,...,124239.0,2166742.0,,10983.0,172817.0,857810.0,0.0,640943.0,224819.0,172130.0
01/1982,14993.0,477089.0,1782195.0,208224.0,557598.0,237093.0,544125.0,1934.0,827732.0,238697.0,...,129312.0,2105863.0,,21145.0,152274.0,934209.0,0.0,542497.0,193858.0,232484.0


In [37]:
wy_oil_month = wy_oil_month.fillna(0)

In [38]:
max(wy_oil_month.index)

'12/2017'

I'm dropping the 2017 data, as other states have data only till Dec, 2016.

In [45]:
wy_oil_month.index = pd.to_datetime(wy_oil_month.index)

In [46]:
wy_oil_month.sort_index(axis=0,inplace=True)

In [47]:
wy_oil_month = wy_oil_month.loc[:'2016-12-01']

In [51]:
wy_oil_month.tail()

County_Name,"Albany County, WY","Big Horn County, WY","Campbell County, WY","Carbon County, WY","Converse County, WY","Crook County, WY","Fremont County, WY","Goshen County, WY","Hot Springs County, WY","Johnson County, WY",...,"Niobrara County, WY","Park County, WY","Platte County, WY","Sheridan County, WY","Sublette County, WY","Sweetwater County, WY","Teton County, WY","Uinta County, WY","Washakie County, WY","Weston County, WY"
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-08-01,2194.0,119544.0,1585998.0,97710.0,837574.0,89818.0,300321.0,0.0,176547.0,100719.0,...,63348.0,514261.0,0.0,1536.0,526474.0,480923.0,0.0,49516.0,51373.0,78001.0
2016-09-01,2113.0,113376.0,1412373.0,100385.0,822229.0,87563.0,299619.0,0.0,172145.0,96179.0,...,61652.0,498938.0,0.0,1710.0,518961.0,471137.0,0.0,51359.0,45432.0,76896.0
2016-10-01,2558.0,117842.0,1585363.0,113021.0,810179.0,87191.0,315067.0,0.0,175932.0,102113.0,...,63595.0,507230.0,0.0,1627.0,533028.0,506529.0,0.0,49877.0,47862.0,77100.0
2016-11-01,2246.0,113799.0,1497360.0,110118.0,770145.0,86833.0,312167.0,0.0,171916.0,94362.0,...,59645.0,503968.0,0.0,1223.0,505685.0,502901.0,0.0,51670.0,47194.0,76795.0
2016-12-01,3791.0,115508.0,1424697.0,99922.0,749765.0,88457.0,309459.0,0.0,176552.0,93170.0,...,55977.0,508477.0,0.0,1034.0,509557.0,500253.0,0.0,53460.0,45735.0,62062.0


In [52]:
wy_oil_month.to_csv('WYOilProdCounty.csv')