In [1]:
import pandas as pd
import datetime as dt

pd.options.display.max_columns=1000
pd.options.display.max_rows = 100

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%run configs.ipynb

In [2]:
# parameter cell
file_name = 'oct_21.csv'

In [3]:
# Parameters
file_name = "september_21.csv"


# RRC Prod

## Ingest

In [4]:
production = pd.read_csv(f'{raw_data_folder}/rrc_prod_blackbeard/2021/{file_name}', header = 5)

In [5]:
production.head()

Unnamed: 0,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,Volume,Code,On Hand End of Month,Formation Production,Volume.1,Code.1
0,,Oil,8.0,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,260,23,143,1.0,140,0,,
1,,Oil,8.0,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,39,0,21,1.0,18,0,,
2,,Oil,8.0,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,272,161,172,1.0,261,1941,1941.0,2.0
3,,Oil,8.0,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,1932,7137,7348,0.0,1721,21352,21352.0,2.0
4,,Oil,8.0,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,394,1569,1456,0.0,507,3346,3346.0,2.0


## Transform RRC

### Filter to District

In [6]:
#converting data field to string to handle varying schema interpreations 
#(certain months field will be interpreted as float and string depending sample rows pandas uses

production = production.loc[production['District'].astype(str).isin(['8.0', '08', '8'])]

In [7]:
production

Unnamed: 0,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,Volume,Code,On Hand End of Month,Formation Production,Volume.1,Code.1
0,,Oil,8.0,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,260,23,143,1.0,140,0,,
1,,Oil,8.0,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,39,0,21,1.0,18,0,,
2,,Oil,8.0,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,272,161,172,1.0,261,1941,1941,2.0
3,,Oil,8.0,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,1932,7137,7348,0.0,1721,21352,21352,2.0
4,,Oil,8.0,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,394,1569,1456,0.0,507,3346,3346,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
435,,Gas,8.0,147187.0,WADDELL (GRAYBURG),"WADDELL, W.N.",859,,8988.0,7,26,25,1.0,8,509,509,2.0
436,,Gas,8.0,190864.0,WADDELL (GRAYBURG),"WADDELL, W.N.",890,,8988.0,0,0,,,0,0,,
437,,Gas,8.0,190926.0,WADDELL (GRAYBURG),"WADDELL, W.N.",293,,3501.0,14,26,32,1.0,8,459,459,2.0
438,,Gas,8.0,205775.0,WADDELL (GRAYBURG),"WADDELL, W. N.",1200,,8092.0,14,27,33,1.0,8,486,486,2.0


In [8]:
production.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 437 entries, 0 to 439
Data columns (total 17 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Multiple Reports            57 non-null     object 
 1   Lease Type                  384 non-null    object 
 2   District                    437 non-null    float64
 3   RRC Identifier              437 non-null    float64
 4   Field Name                  437 non-null    object 
 5   Lease Name                  437 non-null    object 
 6   Gas Well ID                 220 non-null    object 
 7   Lease Total                 13 non-null     object 
 8   Commingle Permit No.        270 non-null    float64
 9   On Hand Beginning of Month  398 non-null    object 
 10  Production                  437 non-null    object 
 11  Volume                      191 non-null    object 
 12  Code                        191 non-null    float64
 13  On Hand End of Month        437 non

### Clean Oil Production Columns and Convert to Int

In [9]:
production['Volume'] = production.loc[:,'Volume'].fillna(0).apply(lambda x: int(str(x).replace(',', '')))
production['Volume.1'] = production.loc[:,'Volume.1'].fillna(0).apply(lambda x: int(str(x).replace(',', '')))

### Rename Volume to rrc_oil_volume

In [10]:
production.rename(columns = {'Volume': 'rrc_oil_volume'}, inplace = True)
production.rename(columns = {'Volume.1': 'rrc_gas_volume'}, inplace = True)

In [11]:
production['rrc_oil_volume'].sum()
production['rrc_gas_volume'].sum()

375220

1352958

### Normalize Field and Reservoir Names with Text Cleaning

In [12]:
production["Normalized_Field_Name"] = production['Field Name'].dropna()\
                                                            .apply(lambda x: str(x).split('(')[0]\
                                                                                    .split(',')[0]\
                                                                                    .strip())

In [13]:
production["Normalized_Reservoir_Name"] = production['Field Name'].dropna()\
                                        .apply(lambda x: str(x).split('(')[-1]\
                                                               .strip().replace(')',''))

### Get Date from Production File

In [14]:
def get_date_from_csv(filepath, col_number, row_number):
    str_date = pd.read_csv(filepath, usecols = [col_number]).iloc[row_number].values[0]
    return str_date.strip()

In [15]:
get_date_from_csv(f'{raw_data_folder}/rrc_prod_blackbeard/2021/{file_name}', 3, 1)

'Sept 2021'

### Add Production Date to Production Data Frame

In [16]:
production.insert(0, 'Date', get_date_from_csv(f'{raw_data_folder}/rrc_prod_blackbeard/2021/{file_name}', 3, 1))

In [17]:
production

Unnamed: 0,Date,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,rrc_oil_volume,Code,On Hand End of Month,Formation Production,rrc_gas_volume,Code.1,Normalized_Field_Name,Normalized_Reservoir_Name
0,Sept 2021,,Oil,8.0,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,260,23,143,1.0,140,0,0,,ARMER,TUBB
1,Sept 2021,,Oil,8.0,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,39,0,21,1.0,18,0,0,,ARMER,6350
2,Sept 2021,,Oil,8.0,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,272,161,172,1.0,261,1941,1941,2.0,ARMER,6350
3,Sept 2021,,Oil,8.0,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,1932,7137,7348,0.0,1721,21352,21352,2.0,ARMER,6350
4,Sept 2021,,Oil,8.0,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,394,1569,1456,0.0,507,3346,3346,2.0,ARMER,6350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
435,Sept 2021,,Gas,8.0,147187.0,WADDELL (GRAYBURG),"WADDELL, W.N.",859,,8988.0,7,26,25,1.0,8,509,509,2.0,WADDELL,GRAYBURG
436,Sept 2021,,Gas,8.0,190864.0,WADDELL (GRAYBURG),"WADDELL, W.N.",890,,8988.0,0,0,0,,0,0,0,,WADDELL,GRAYBURG
437,Sept 2021,,Gas,8.0,190926.0,WADDELL (GRAYBURG),"WADDELL, W.N.",293,,3501.0,14,26,32,1.0,8,459,459,2.0,WADDELL,GRAYBURG
438,Sept 2021,,Gas,8.0,205775.0,WADDELL (GRAYBURG),"WADDELL, W. N.",1200,,8092.0,14,27,33,1.0,8,486,486,2.0,WADDELL,GRAYBURG


### Add columns for net production to Trust

In [18]:
production['Net Oil Volume to Trust (RRC)'] = production['rrc_oil_volume'] * 0.50
production['Net Oil Sales Volume to Trust (RRC)'] = production['rrc_oil_volume'] * 0.50 * 0.75

In [19]:
production['Net Gas Volume to Trust (RRC)'] = production['rrc_gas_volume'] * 0.50
production['Net Gas Sales Volume to Trust (RRC)'] = production['rrc_gas_volume'] * 0.50 * 0.75

In [20]:
production

Unnamed: 0,Date,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,rrc_oil_volume,Code,On Hand End of Month,Formation Production,rrc_gas_volume,Code.1,Normalized_Field_Name,Normalized_Reservoir_Name,Net Oil Volume to Trust (RRC),Net Oil Sales Volume to Trust (RRC),Net Gas Volume to Trust (RRC),Net Gas Sales Volume to Trust (RRC)
0,Sept 2021,,Oil,8.0,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,260,23,143,1.0,140,0,0,,ARMER,TUBB,71.5,53.625,0.0,0.000
1,Sept 2021,,Oil,8.0,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,39,0,21,1.0,18,0,0,,ARMER,6350,10.5,7.875,0.0,0.000
2,Sept 2021,,Oil,8.0,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,272,161,172,1.0,261,1941,1941,2.0,ARMER,6350,86.0,64.500,970.5,727.875
3,Sept 2021,,Oil,8.0,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,1932,7137,7348,0.0,1721,21352,21352,2.0,ARMER,6350,3674.0,2755.500,10676.0,8007.000
4,Sept 2021,,Oil,8.0,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,394,1569,1456,0.0,507,3346,3346,2.0,ARMER,6350,728.0,546.000,1673.0,1254.750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
435,Sept 2021,,Gas,8.0,147187.0,WADDELL (GRAYBURG),"WADDELL, W.N.",859,,8988.0,7,26,25,1.0,8,509,509,2.0,WADDELL,GRAYBURG,12.5,9.375,254.5,190.875
436,Sept 2021,,Gas,8.0,190864.0,WADDELL (GRAYBURG),"WADDELL, W.N.",890,,8988.0,0,0,0,,0,0,0,,WADDELL,GRAYBURG,0.0,0.000,0.0,0.000
437,Sept 2021,,Gas,8.0,190926.0,WADDELL (GRAYBURG),"WADDELL, W.N.",293,,3501.0,14,26,32,1.0,8,459,459,2.0,WADDELL,GRAYBURG,16.0,12.000,229.5,172.125
438,Sept 2021,,Gas,8.0,205775.0,WADDELL (GRAYBURG),"WADDELL, W. N.",1200,,8092.0,14,27,33,1.0,8,486,486,2.0,WADDELL,GRAYBURG,16.5,12.375,243.0,182.250


### Write to File

In [21]:
production.to_csv(f'{processed_data_folder}/rrc_prod/{file_name}', index = False)

## Aggregated EDA

In [22]:
production.groupby(['Normalized_Field_Name']).agg({'rrc_oil_volume': 'sum'})\
                                            .sort_values(by = 'rrc_oil_volume', ascending= False)\
                                            .head(15)

Unnamed: 0_level_0,rrc_oil_volume
Normalized_Field_Name,Unnamed: 1_level_1
SAND HILLS,225485
MONAHANS,65789
ARMER,31851
DUNE,17598
UNIVERSITY WADDELL,10854
MARSTON RANCH,8539
WADDELL,6113
RUNNING W,4855
CORDONA LAKE,2677
LEA,1108


In [23]:
production.groupby(['Normalized_Reservoir_Name', 'Normalized_Field_Name']).agg({'rrc_oil_volume': 'sum'}).sort_values(by = 'rrc_oil_volume', ascending= False).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,rrc_oil_volume
Normalized_Reservoir_Name,Normalized_Field_Name,Unnamed: 2_level_1
6350,ARMER,31708
7900,EDWARDS -04-,0
CLEAR FORK,LEA,265
CLEAR FORK,MONAHANS,0
CLEAR FORK,SAND HILLS,35946
CLEAR FORK 4070,SAND HILLS,12
"CLEAR FORK, LOWER",MCKEE,0
"CLEAR FORK, MID.",MCKEE,0
CLEARFORK,MARSTON RANCH,8539
CLEARFORK,MONAHANS,59518
