In [1]:
import pandas as pd
import geopandas as gp
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from geopy.geocoders import Nominatim
import shapely
import math

In [2]:
import os
#had to add GDAL_DATA variable to system variables and set value to the folder of gdal in C:\Users\mishaun\AppData\Local\Continuum\anaconda3\Library\share\gdal on my work computer
'GDAL_DATA' in os.environ

True

In [3]:
#These 2 lines of code will allow for all output to be displayed within a given cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Reading in Data

In [4]:
shapezipfile = ("zip://Data/BLMWY-2020-Q1-3_WGS84.zip")
tractshp = gp.read_file(shapezipfile, encoding = "utf-8")

In [5]:
#this is shapefile downloaded from drillinginfo holding well information
prodshp = gp.read_file("zip://Data/production.ZIP")
permitshp = gp.read_file("zip://Data/permits.ZIP")

#reading in csv of leases - converting to a GeoDataFrame - initial coord system is epsg:4326
leases = pd.read_csv("Data/LeasesTable.CSV")
leasesgeo = gp.GeoDataFrame(leases, crs = {'init': 'epsg:4326'}, geometry=gp.points_from_xy(leases["Longitude (WGS84)"], leases["Latitude (WGS84)"]))

### Trimming and Cleaning Data

In [6]:
leasesgeo.columns

Index(['State/Province', 'Effective Date', 'Record Date',
       'Expiration of Primary Term', 'Term (Months)', 'Grantor',
       'Grantee Alias', 'Royalty', 'Bonus', 'Area (Acres)', 'Section',
       'Township', 'Township Direction', 'Range', 'Range Direction',
       'Vol/Page', 'Record Number', 'Instrument Type', 'Instrument Date',
       'County/Parish', 'Options/Extensions', 'DI Basin', 'Ext. Bonus',
       'Ext. Term (Months)', 'Abstract', 'Block', 'BLM', 'State Lease',
       'Grantee', 'Grantor Address', 'Grantee Address', 'Max Depth',
       'Majority Legal Assignee', 'DI Subplay', 'Min Depth',
       'Majority Assignment Effective Date', 'Latitude (WGS84)', 'DI Play',
       'Majority Legal Assignee Interest', 'Longitude (WGS84)',
       'Majority Assignment Vol/Page', 'geometry'],
      dtype='object')

In [7]:
leasesgeo.drop(columns = ['Instrument Type', 'Instrument Date','Options/Extensions', 'DI Basin', 'Ext. Bonus',
       'Ext. Term (Months)', 'Abstract', 'Block', 'BLM', 'State Lease',
       'Grantee', 'Grantor Address', 'Grantee Address', 'Max Depth',
       'Majority Legal Assignee', 'DI Subplay', 'Min Depth',
       'Majority Assignment Effective Date','Majority Legal Assignee Interest','Majority Assignment Vol/Page'], inplace = True)

In [8]:
prodshp.columns

Index(['APIUWI', 'OpAlias', 'LeaseName', 'WellNo', 'County', 'Reservoir',
       'ProdType', 'ProdStatus', 'DrillType', 'TD', 'SpudDate', 'FstPrdDate',
       'LstPrdDate', 'MoProd', 'CumGas', 'DailyGas', 'CumLiq', 'DailyLiq',
       'LatestLiq', 'LatestGas', 'CumWtr', 'CumBOE', 'DISubplay', '1moLiq',
       '1moGas', '6moLiq', 'DIBasin', '6moGas', '6moBOE', '6moWater', 'DIPlay',
       'PracIP_Liq', 'PracIP_BOE', 'PracIP_Gas', 'PrcIPCFGED', 'LatestWtr',
       'Prior12Liq', 'Prior12Gas', 'LastTestDt', 'Prior12Wtr', 'LastFlwPrs',
       'LastWHSIP', '2moGOR', 'LatestGOR', 'CumGOR', 'Lst12Yield', '2moYield',
       'LatestYld', 'PeakGas', 'PkGasMoNo', 'PeakLiq', 'PkLiqMoNo', 'PeakBOE',
       'PkBOEMoNo', 'PkMMCFGE', 'PkMMCFGMoN', 'TopPerf', 'BtmPerf', 'GasGrav',
       'OilGrav', 'CompDate', 'WellCount', 'MaxActvWel', 'GasGather',
       'LiqGather', 'LeaseNo', 'PerfLength', 'TVD', 'Field', 'State',
       'District', 'GeoProvin', 'Section', 'Country', 'Township', 'Range',
       'Lati

In [9]:
prodshp.drop(columns = ['LatestWtr','CumWtr',
       'Prior12Liq', 'Prior12Gas', 'LastTestDt', 'Prior12Wtr', 'LastFlwPrs',
       'LastWHSIP', '2moGOR', 'LatestGOR', 'CumGOR', 'Lst12Yield', '2moYield',
       'LatestYld', 'PeakGas', 'PkGasMoNo', 'PeakLiq', 'PkLiqMoNo', 'PeakBOE',
       'PkBOEMoNo', 'PkMMCFGE', 'PkMMCFGMoN', 'TopPerf', 'BtmPerf', 'GasGrav',
       'OilGrav','CompDate', 'GasGather',
       'LiqGather', 'LeaseNo'], inplace = True)

In [10]:
permitshp.columns

Index(['API10UWI', 'District', 'FiledDate', 'AprvdDate', 'ExpDate', 'State',
       'County', 'OpAlias', 'LeaseName', 'WellNo', 'Formation', 'PermDepth',
       'TVD', 'PermitType', 'WellType', 'DrillType', 'WellStatus',
       'PermStatus', 'Field', 'OpReported', 'AmendDate', 'CntctName',
       'CntctPhone', 'OperAddrs', 'OperCity', 'OperState', 'OperZip',
       'OperCity30', 'Section', 'OperCity50', 'Township', 'Range', 'Block',
       'Survey', 'TVD_UOM', 'Abstract', 'WGID', 'H2S_Area', 'Latitude',
       'Longitude', 'OFS_Reg', 'Btm_Lat', 'Btm_Lon', 'LeaseNo', 'PermDUOM',
       'PermitNo', 'DIBasin', 'DIPlay', 'DISubplay', 'OpCompany', 'OpTicker',
       'geometry'],
      dtype='object')

In [11]:
permitshp.drop(columns = ['OpReported', 'AmendDate', 'CntctName',
       'CntctPhone', 'OperAddrs', 'OperCity', 'OperState', 'OperZip',
       'OperCity30', 'Section', 'OperCity50', 'Township', 'Range', 'Block',
       'Survey', 'TVD_UOM', 'Abstract', 'WGID', 'H2S_Area','OFS_Reg', 'LeaseNo', 'PermDUOM',
       'PermitNo','OpCompany', 'OpTicker'], inplace=True)

In [12]:
leasesgeo["Record Date"] = pd.to_datetime(leasesgeo["Record Date"])
prodshp["FstPrdDate"] = pd.to_datetime(prodshp["FstPrdDate"])
permitshp["AprvdDate"] = pd.to_datetime(permitshp["AprvdDate"])

In [13]:
leasesgeo.info()
prodshp.info()
permitshp.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 53266 entries, 0 to 53265
Data columns (total 22 columns):
State/Province                53266 non-null object
Effective Date                31049 non-null object
Record Date                   53266 non-null datetime64[ns]
Expiration of Primary Term    53266 non-null object
Term (Months)                 53266 non-null int64
Grantor                       53266 non-null object
Grantee Alias                 53239 non-null object
Royalty                       33903 non-null float64
Bonus                         18347 non-null float64
Area (Acres)                  52506 non-null float64
Section                       53266 non-null float64
Township                      53266 non-null float64
Township Direction            53266 non-null object
Range                         53266 non-null float64
Range Direction               53266 non-null object
Vol/Page                      53266 non-null object
Record Number                 53266 no

### Converting geodataframes to same coord system - UTM system for creating buffers

In [14]:
prodshp.to_crs(epsg = 26913, inplace = True)
permitshp.to_crs(epsg = 26913, inplace = True)
leasesgeo.to_crs(epsg = 26913, inplace = True)
tractshp.to_crs(epsg = 26913, inplace = True)

In [15]:
prodshp.crs
permitshp.crs
leasesgeo.crs

{'init': 'epsg:26913', 'no_defs': True}

{'init': 'epsg:26913', 'no_defs': True}

{'init': 'epsg:26913', 'no_defs': True}

### Adding a column to tract shapefile data for centroids of each tract for creating buffers upon

In [16]:
tractshp["centroids"] = tractshp.centroid
#calculating acres of tract, conv to acres is m^2 to acres
tractshp['Acres'] = round(tractshp.area * 0.000247105)
tractshp.head()

Unnamed: 0,SaleParcel,lot_no,tract_id,short_code,label,geometry,centroids,Acres
0,WY-201Q-105,65012,2,BLMWY-2020-Q1-3,WY-2020-03-0374,"MULTIPOLYGON (((173527.819 4984155.461, 173534...",POINT (172183.913 4984182.822),941.0
1,WY-201Q-063,64970,16,BLMWY-2020-Q1-3,WY-2020-03-6207,"MULTIPOLYGON (((219007.722 4650891.363, 218687...",POINT (222418.619 4647579.181),1322.0
2,WY-201Q-001,64908,93,BLMWY-2020-Q1-3,WY-2020-03-6613,"POLYGON ((562927.151 4797521.250, 563330.976 4...",POINT (563130.227 4797125.862),80.0
3,WY-201Q-002,64909,102,BLMWY-2020-Q1-3,WY-2020-03-6660,"POLYGON ((514894.040 4607634.757, 515295.946 4...",POINT (515096.374 4607429.194),40.0
4,WY-201Q-003,64910,89,BLMWY-2020-Q1-3,WY-2020-03-6585,"MULTIPOLYGON (((518971.985 4775132.462, 519376...",POINT (520545.083 4773447.285),2316.0


In [17]:
#adding buffer around centroid point from tract of 3 mi (1609.34 meters = 1 mile)
milesbuffer = 3 * 1609.34

tractshp["buffers"] = tractshp.centroids.apply(lambda x: x.buffer(milesbuffer,20))


# Testing Spatial Filters

In [18]:
TestT = 31
tractTest = tractshp[tractshp["tract_id"] == TestT].iloc[0]


permFiltered = permitshp.within(tractshp[tractshp["tract_id"] ==TestT]["buffers"].iloc[0])
prodFiltered = prodshp.within(tractshp[tractshp["tract_id"] ==TestT]["buffers"].iloc[0])
leasesFiltered = leasesgeo.within(tractshp[tractshp["tract_id"]==TestT]["buffers"].iloc[0])

In [19]:
permFiltered.value_counts()

False    46469
dtype: int64

In [20]:
permittoeval = permitshp.loc[permFiltered]

In [21]:
prodtoeval = prodshp.loc[prodFiltered]

In [22]:
prodFiltered.value_counts()

False    62880
dtype: int64

In [23]:
leasesFiltered.value_counts()

False    53240
True        26
dtype: int64

In [24]:
leasestoeval = leasesgeo.loc[leasesFiltered]

In [25]:
leasestoeval.drop_duplicates("Record Number",inplace = True)
leasestoeval

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,State/Province,Effective Date,Record Date,Expiration of Primary Term,Term (Months),Grantor,Grantee Alias,Royalty,Bonus,Area (Acres),...,Township Direction,Range,Range Direction,Vol/Page,Record Number,County/Parish,Latitude (WGS84),DI Play,Longitude (WGS84),geometry
23111,WY,,2018-09-18,2028-09-18,120,BUREAU OF LAND MANAGEMENT,SITKA ENERGY,0.125,7.0,513.700012,...,N,101.0,W,WY-183Q-302/NA,WYW187545,SWEETWATER (WY),42.169544,GREEN RIVER - OVERTHRUST,-108.81983,POINT (184468.498 4675667.308)
23116,WY,,2018-09-18,2028-09-18,120,BUREAU OF LAND MANAGEMENT,BRO ENERGY,0.125,6.0,695.109985,...,N,101.0,W,WY-183Q-303/NA,WYW187546,SWEETWATER (WY),42.155037,GREEN RIVER - OVERTHRUST,-108.819769,POINT (184401.331 4674055.971)
23270,WY,,2018-09-18,2028-09-18,120,BUREAU OF LAND MANAGEMENT,SITKA ENERGY,0.125,6.0,1922.47998,...,N,100.0,W,WY-183Q-288/NA,WYW187536,SWEETWATER (WY),42.169326,GREEN RIVER - OVERTHRUST,-108.742222,POINT (190879.051 4675358.708)
23278,WY,,2018-09-18,2028-09-18,120,BUREAU OF LAND MANAGEMENT,KIRKWOOD O&G,0.125,2.0,2548.679932,...,N,100.0,W,WY-183Q-290/NA,WYW187538,SWEETWATER (WY),42.154873,GREEN RIVER - OVERTHRUST,-108.781019,POINT (187602.658 4673895.000)
23404,WY,,2018-09-18,2028-09-18,120,BUREAU OF LAND MANAGEMENT,BRO ENERGY,0.125,6.0,1897.719971,...,N,100.0,W,WY-183Q-289/NA,WYW187537,SWEETWATER (WY),42.169322,GREEN RIVER - OVERTHRUST,-108.800085,POINT (186098.674 4675569.809)
26898,WY,,2018-07-18,2023-07-18,60,STATE OF WYOMING,KIRKWOOD O&G,0.1667,3.0,40.0,...,N,100.0,W,122/NA,18-00268,SWEETWATER (WY),42.183925,GREEN RIVER - OVERTHRUST,-108.742085,POINT (190961.599 4676979.478)


### Calculating distance between leases within 3 mi radius and tract's centroid

In [26]:
#calculating distance (in miles) away lease within 3 mi radius is to tract of interest
leasestoeval["distance"] = leasestoeval["geometry"].apply(lambda x: x.distance(tractTest["centroids"])/1609.34)
leasestoeval

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,State/Province,Effective Date,Record Date,Expiration of Primary Term,Term (Months),Grantor,Grantee Alias,Royalty,Bonus,Area (Acres),...,Range,Range Direction,Vol/Page,Record Number,County/Parish,Latitude (WGS84),DI Play,Longitude (WGS84),geometry,distance
23111,WY,,2018-09-18,2028-09-18,120,BUREAU OF LAND MANAGEMENT,SITKA ENERGY,0.125,7.0,513.700012,...,101.0,W,WY-183Q-302/NA,WYW187545,SWEETWATER (WY),42.169544,GREEN RIVER - OVERTHRUST,-108.81983,POINT (184468.498 4675667.308),2.258084
23116,WY,,2018-09-18,2028-09-18,120,BUREAU OF LAND MANAGEMENT,BRO ENERGY,0.125,6.0,695.109985,...,101.0,W,WY-183Q-303/NA,WYW187546,SWEETWATER (WY),42.155037,GREEN RIVER - OVERTHRUST,-108.819769,POINT (184401.331 4674055.971),2.842761
23270,WY,,2018-09-18,2028-09-18,120,BUREAU OF LAND MANAGEMENT,SITKA ENERGY,0.125,6.0,1922.47998,...,100.0,W,WY-183Q-288/NA,WYW187536,SWEETWATER (WY),42.169326,GREEN RIVER - OVERTHRUST,-108.742222,POINT (190879.051 4675358.708),2.203406
23278,WY,,2018-09-18,2028-09-18,120,BUREAU OF LAND MANAGEMENT,KIRKWOOD O&G,0.125,2.0,2548.679932,...,100.0,W,WY-183Q-290/NA,WYW187538,SWEETWATER (WY),42.154873,GREEN RIVER - OVERTHRUST,-108.781019,POINT (187602.658 4673895.000),2.006564
23404,WY,,2018-09-18,2028-09-18,120,BUREAU OF LAND MANAGEMENT,BRO ENERGY,0.125,6.0,1897.719971,...,100.0,W,WY-183Q-289/NA,WYW187537,SWEETWATER (WY),42.169322,GREEN RIVER - OVERTHRUST,-108.800085,POINT (186098.674 4675569.809),1.429576
26898,WY,,2018-07-18,2023-07-18,60,STATE OF WYOMING,KIRKWOOD O&G,0.1667,3.0,40.0,...,100.0,W,122/NA,18-00268,SWEETWATER (WY),42.183925,GREEN RIVER - OVERTHRUST,-108.742085,POINT (190961.599 4676979.478),1.966219


#test to get compass direction

a = leasestoeval["geometry"].iloc[0].x
b = leasestoeval["geometry"].iloc[0].y
c = tractTest["centroids"].x
d = tractTest["centroids"].y

#south and west are positive direction for my axis convention
western = c - a
southern = d - b

diff = western/southern
compdeg = math.atan(diff)
compdeg *= 57.2958
compdeg

if southern > 0:
    if compdeg < 25:
        carddir = "S"
    elif compdeg > 75:
        if western>0:
            carddir = "W"
        else:
            carddir = "E"
    else:
        if western>0:
            carddir = "SW"
        else:
            carddir = "SE"
else:
    if compdeg < 25:
        carddir = "S"
    elif compdeg > 75:
        if western>0:
            carddir = "W"
        else:
            carddir = "E"
    else:
        if western>0:
            carddir = "SW"
        else:
            carddir = "SE"  
                
carddir    
    

In [49]:
#function to get compass direction

def cardDir(point, tractRef):
    
    a = point.x
    b = point.y
    c = tractRef["centroids"].x
    d = tractRef["centroids"].y

    #south and west are positive direction for my axis convention
    western = c - a
    southern = d - b

    diff = western/southern
    compdeg = math.atan(diff)
    compdeg *= 57.2958
    compdeg

    if southern > 0:
        if compdeg < 25:
            carddir = "S"
        elif compdeg > 75:
            if western>0:
                carddir = "W"
            else:
                carddir = "E"
        else:
            if western>0:
                carddir = "SW"
            else:
                carddir = "SE"
    else:
        if compdeg < 25:
            carddir = "N"
        elif compdeg > 75:
            if western>0:
                carddir = "W"
            else:
                carddir = "E"
        else:
            if western>0:
                carddir = "NW"
            else:
                carddir = "NE"  
     
    return carddir           
   
    

In [51]:
leasestoeval["direction"] = leasestoeval["geometry"].apply(lambda x: cardDir(x, tractTest))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


### Statistical summaries: bonus by year, closest lease, etc

In [32]:
leasestoeval.groupby(leasestoeval["Record Date"].apply(lambda x: x.year)).describe()

Unnamed: 0_level_0,Term (Months),Term (Months),Term (Months),Term (Months),Term (Months),Term (Months),Term (Months),Term (Months),Royalty,Royalty,...,Longitude (WGS84),Longitude (WGS84),distance,distance,distance,distance,distance,distance,distance,distance
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Record Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018,6.0,110.0,24.494897,60.0,120.0,120.0,120.0,120.0,6.0,0.13195,...,-108.751921,-108.742085,6.0,2.117768,0.460778,1.429576,1.976305,2.104985,2.244414,2.842761


In [33]:
leasestoeval["Record Date"]
leasestoeval.describe()["Bonus"]["mean"]

23111   2018-09-18
23116   2018-09-18
23270   2018-09-18
23278   2018-09-18
23404   2018-09-18
26898   2018-07-18
Name: Record Date, dtype: datetime64[ns]

5.0