In [1]:
import pandas as pd
import datetime as dt

pd.options.display.max_columns=1000
pd.options.display.max_rows = 100

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%run configs.ipynb

In [2]:
# parameter cell
file_name = 'oct_21.csv'

In [3]:
# Parameters
file_name = "feb_21.csv"


# RRC Prod

## Ingest

In [4]:
production = pd.read_csv(f'{raw_data_folder}/rrc_prod_blackbeard/2021/{file_name}', header = 5)

In [5]:
production.head()

Unnamed: 0,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,Volume,Code,On Hand End of Month,Formation Production,Volume.1,Code.1
0,,Gas,05,164066.0,DUBOIS (BARNETT),MILES,1,,,0,0,,,0,0,,
1,,Gas,7B,211352.0,"GRANBURY, N.E. (ATOKA)",LANGDON,2,,5422.0,4,0,,,4,0,,
2,,Gas,7B,212409.0,"GRANBURY, N.E. (ATOKA)","JOHN J. MILES, JR. GAS UNIT",4,,,0,0,,,0,0,,
3,,Gas,7B,228695.0,"GRANBURY, N.E. (ATOKA)",LUCY UNIT,2,,5699.0,12,0,,,12,662,662.0,3.0
4,,Gas,7B,228926.0,"GRANBURY, N.E. (ATOKA)","MARSHALL, SAM A",2,,,0,0,,,0,0,,


## Transform RRC

### Filter to District

In [6]:
#converting data field to string to handle varying schema interpreations 
#(certain months field will be interpreted as float and string depending sample rows pandas uses

production = production.loc[production['District'].astype(str).isin(['8.0', '08', '8'])]

In [7]:
production

Unnamed: 0,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,Volume,Code,On Hand End of Month,Formation Production,Volume.1,Code.1
7,,Oil,08,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,155,27,,,182,0,,
8,,Oil,08,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,58,0,,,58,0,,
9,,Oil,08,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,350,146,174,1.0,322,2101,2101,2.0
10,,Oil,08,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,1892,7350,7211,0.0,2031,28814,28814,2.0
11,,Oil,08,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,441,1299,1257,0.0,483,5640,5640,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
470,,Gas,08,160203.0,WADDELL (GRAYBURG),"WADDELL, W.N.",310,,8988.0,0,0,,,0,0,,
471,,Gas,08,190864.0,WADDELL (GRAYBURG),"WADDELL, W.N.",890,,8988.0,0,0,,,0,0,,
472,,Gas,08,190926.0,WADDELL (GRAYBURG),"WADDELL, W.N.",293,,3501.0,16,30,30,1.0,16,81,81,2.0
473,,Gas,08,205775.0,WADDELL (GRAYBURG),"WADDELL, W. N.",1200,,8092.0,16,19,25,1.0,10,334,334,2.0


In [8]:
production.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 464 entries, 7 to 474
Data columns (total 17 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Multiple Reports            14 non-null     object 
 1   Lease Type                  408 non-null    object 
 2   District                    464 non-null    object 
 3   RRC Identifier              464 non-null    float64
 4   Field Name                  464 non-null    object 
 5   Lease Name                  464 non-null    object 
 6   Gas Well ID                 236 non-null    object 
 7   Lease Total                 14 non-null     object 
 8   Commingle Permit No.        275 non-null    float64
 9   On Hand Beginning of Month  464 non-null    object 
 10  Production                  464 non-null    object 
 11  Volume                      208 non-null    object 
 12  Code                        208 non-null    float64
 13  On Hand End of Month        464 non

### Clean Oil Production Columns and Convert to Int

In [9]:
production['Volume'] = production.loc[:,'Volume'].fillna(0).apply(lambda x: int(str(x).replace(',', '')))
production['Volume.1'] = production.loc[:,'Volume.1'].fillna(0).apply(lambda x: int(str(x).replace(',', '')))

### Rename Volume to rrc_oil_volume

In [10]:
production.rename(columns = {'Volume': 'rrc_oil_volume'}, inplace = True)
production.rename(columns = {'Volume.1': 'rrc_gas_volume'}, inplace = True)

In [11]:
production['rrc_oil_volume'].sum()
production['rrc_gas_volume'].sum()

332355

1327661

### Normalize Field and Reservoir Names with Text Cleaning

In [12]:
production["Normalized_Field_Name"] = production['Field Name'].dropna()\
                                                            .apply(lambda x: str(x).split('(')[0]\
                                                                                    .split(',')[0]\
                                                                                    .strip())

In [13]:
production["Normalized_Reservoir_Name"] = production['Field Name'].dropna()\
                                        .apply(lambda x: str(x).split('(')[-1]\
                                                               .strip().replace(')',''))

### Get Date from Production File

In [14]:
def get_date_from_csv(filepath, col_number, row_number):
    str_date = pd.read_csv(filepath, usecols = [col_number]).iloc[row_number].values[0]
    return str_date.strip()

In [15]:
get_date_from_csv(f'{raw_data_folder}/rrc_prod_blackbeard/2021/{file_name}', 3, 1)

'Feb 2021'

### Add Production Date to Production Data Frame

In [16]:
production.insert(0, 'Date', get_date_from_csv(f'{raw_data_folder}/rrc_prod_blackbeard/2021/{file_name}', 3, 1))

In [17]:
production

Unnamed: 0,Date,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,rrc_oil_volume,Code,On Hand End of Month,Formation Production,rrc_gas_volume,Code.1,Normalized_Field_Name,Normalized_Reservoir_Name
7,Feb 2021,,Oil,08,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,155,27,0,,182,0,0,,ARMER,TUBB
8,Feb 2021,,Oil,08,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,58,0,0,,58,0,0,,ARMER,6350
9,Feb 2021,,Oil,08,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,350,146,174,1.0,322,2101,2101,2.0,ARMER,6350
10,Feb 2021,,Oil,08,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,1892,7350,7211,0.0,2031,28814,28814,2.0,ARMER,6350
11,Feb 2021,,Oil,08,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,441,1299,1257,0.0,483,5640,5640,2.0,ARMER,6350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
470,Feb 2021,,Gas,08,160203.0,WADDELL (GRAYBURG),"WADDELL, W.N.",310,,8988.0,0,0,0,,0,0,0,,WADDELL,GRAYBURG
471,Feb 2021,,Gas,08,190864.0,WADDELL (GRAYBURG),"WADDELL, W.N.",890,,8988.0,0,0,0,,0,0,0,,WADDELL,GRAYBURG
472,Feb 2021,,Gas,08,190926.0,WADDELL (GRAYBURG),"WADDELL, W.N.",293,,3501.0,16,30,30,1.0,16,81,81,2.0,WADDELL,GRAYBURG
473,Feb 2021,,Gas,08,205775.0,WADDELL (GRAYBURG),"WADDELL, W. N.",1200,,8092.0,16,19,25,1.0,10,334,334,2.0,WADDELL,GRAYBURG


### Add columns for net production to Trust

In [18]:
production['Net Oil Volume to Trust (RRC)'] = production['rrc_oil_volume'] * 0.50
production['Net Oil Sales Volume to Trust (RRC)'] = production['rrc_oil_volume'] * 0.50 * 0.75

In [19]:
production['Net Gas Volume to Trust (RRC)'] = production['rrc_gas_volume'] * 0.50
production['Net Gas Sales Volume to Trust (RRC)'] = production['rrc_gas_volume'] * 0.50 * 0.75

In [20]:
production

Unnamed: 0,Date,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,rrc_oil_volume,Code,On Hand End of Month,Formation Production,rrc_gas_volume,Code.1,Normalized_Field_Name,Normalized_Reservoir_Name,Net Oil Volume to Trust (RRC),Net Oil Sales Volume to Trust (RRC),Net Gas Volume to Trust (RRC),Net Gas Sales Volume to Trust (RRC)
7,Feb 2021,,Oil,08,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,155,27,0,,182,0,0,,ARMER,TUBB,0.0,0.000,0.0,0.000
8,Feb 2021,,Oil,08,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,58,0,0,,58,0,0,,ARMER,6350,0.0,0.000,0.0,0.000
9,Feb 2021,,Oil,08,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,350,146,174,1.0,322,2101,2101,2.0,ARMER,6350,87.0,65.250,1050.5,787.875
10,Feb 2021,,Oil,08,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,1892,7350,7211,0.0,2031,28814,28814,2.0,ARMER,6350,3605.5,2704.125,14407.0,10805.250
11,Feb 2021,,Oil,08,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,441,1299,1257,0.0,483,5640,5640,2.0,ARMER,6350,628.5,471.375,2820.0,2115.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
470,Feb 2021,,Gas,08,160203.0,WADDELL (GRAYBURG),"WADDELL, W.N.",310,,8988.0,0,0,0,,0,0,0,,WADDELL,GRAYBURG,0.0,0.000,0.0,0.000
471,Feb 2021,,Gas,08,190864.0,WADDELL (GRAYBURG),"WADDELL, W.N.",890,,8988.0,0,0,0,,0,0,0,,WADDELL,GRAYBURG,0.0,0.000,0.0,0.000
472,Feb 2021,,Gas,08,190926.0,WADDELL (GRAYBURG),"WADDELL, W.N.",293,,3501.0,16,30,30,1.0,16,81,81,2.0,WADDELL,GRAYBURG,15.0,11.250,40.5,30.375
473,Feb 2021,,Gas,08,205775.0,WADDELL (GRAYBURG),"WADDELL, W. N.",1200,,8092.0,16,19,25,1.0,10,334,334,2.0,WADDELL,GRAYBURG,12.5,9.375,167.0,125.250


### Write to File

In [21]:
production.to_csv(f'{processed_data_folder}/rrc_prod/{file_name}', index = False)

## Aggregated EDA

In [22]:
production.groupby(['Normalized_Field_Name']).agg({'rrc_oil_volume': 'sum'})\
                                            .sort_values(by = 'rrc_oil_volume', ascending= False)\
                                            .head(15)

Unnamed: 0_level_0,rrc_oil_volume
Normalized_Field_Name,Unnamed: 1_level_1
SAND HILLS,163957
MONAHANS,83008
ARMER,37980
DUNE,12816
KEYSTONE,8071
MARSTON RANCH,8012
UNIVERSITY WADDELL,6925
WADDELL,4948
RUNNING W,1866
KERMIT,1510


In [23]:
production.groupby(['Normalized_Reservoir_Name', 'Normalized_Field_Name']).agg({'rrc_oil_volume': 'sum'}).sort_values(by = 'rrc_oil_volume', ascending= False).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,rrc_oil_volume
Normalized_Reservoir_Name,Normalized_Field_Name,Unnamed: 2_level_1
6350,ARMER,37980
7900,EDWARDS -04-,0
CLEAR FORK,LEA,163
CLEAR FORK,MONAHANS,0
CLEAR FORK,SAND HILLS,35453
CLEAR FORK 4070,SAND HILLS,78
"CLEAR FORK, LOWER",MCKEE,0
"CLEAR FORK, MID.",MCKEE,0
CLEARFORK,MARSTON RANCH,8012
CLEARFORK,MONAHANS,77861
