In [1]:
import pandas as pd
import datetime as dt

pd.options.display.max_columns=1000
pd.options.display.max_rows = 100

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%run configs.ipynb

In [2]:
# parameter cell
file_name = 'oct_21.csv'

In [3]:
# Parameters
file_name = "july_21.csv"


# RRC Prod

## Ingest

In [4]:
production = pd.read_csv(f'{raw_data_folder}/rrc_prod_blackbeard/2021/{file_name}', header = 5)

In [5]:
production.head()

Unnamed: 0,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,Volume,Code,On Hand End of Month,Formation Production,Volume.1,Code.1
0,,Oil,8.0,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,185,37,,,222,0,,
1,,Oil,8.0,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,39,0,,,39,0,,
2,,Oil,8.0,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,267,91,175.0,1.0,183,2165,2165.0,2.0
3,,Oil,8.0,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,2013,8947,9078.0,0.0,1882,22367,22367.0,2.0
4,,Oil,8.0,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,364,1581,1460.0,0.0,485,4933,4933.0,2.0


## Transform RRC

### Filter to District

In [6]:
#converting data field to string to handle varying schema interpreations 
#(certain months field will be interpreted as float and string depending sample rows pandas uses

production = production.loc[production['District'].astype(str).isin(['8.0', '08', '8'])]

In [7]:
production

Unnamed: 0,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,Volume,Code,On Hand End of Month,Formation Production,Volume.1,Code.1
0,,Oil,8.0,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,185,37,,,222,0,,
1,,Oil,8.0,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,39,0,,,39,0,,
2,,Oil,8.0,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,267,91,175,1.0,183,2165,2165,2.0
3,,Oil,8.0,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,2013,8947,9078,0.0,1882,22367,22367,2.0
4,,Oil,8.0,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,364,1581,1460,0.0,485,4933,4933,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,,Gas,8.0,147187.0,WADDELL (GRAYBURG),"WADDELL, W.N.",859,,8988.0,4,24,21,1.0,7,436,436,2.0
384,,Gas,8.0,190864.0,WADDELL (GRAYBURG),"WADDELL, W.N.",890,,8988.0,0,0,,,0,0,,
385,,Gas,8.0,190926.0,WADDELL (GRAYBURG),"WADDELL, W.N.",293,,3501.0,4,25,22,1.0,7,12,12,2.0
386,,Gas,8.0,205775.0,WADDELL (GRAYBURG),"WADDELL, W. N.",1200,,8092.0,4,25,22,1.0,7,240,240,2.0


In [8]:
production.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 385 entries, 0 to 387
Data columns (total 17 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Multiple Reports            12 non-null     object 
 1   Lease Type                  332 non-null    object 
 2   District                    385 non-null    float64
 3   RRC Identifier              385 non-null    float64
 4   Field Name                  385 non-null    object 
 5   Lease Name                  385 non-null    object 
 6   Gas Well ID                 181 non-null    object 
 7   Lease Total                 13 non-null     object 
 8   Commingle Permit No.        258 non-null    float64
 9   On Hand Beginning of Month  385 non-null    object 
 10  Production                  385 non-null    object 
 11  Volume                      194 non-null    object 
 12  Code                        194 non-null    float64
 13  On Hand End of Month        385 non

### Clean Oil Production Columns and Convert to Int

In [9]:
production['Volume'] = production.loc[:,'Volume'].fillna(0).apply(lambda x: int(str(x).replace(',', '')))
production['Volume.1'] = production.loc[:,'Volume.1'].fillna(0).apply(lambda x: int(str(x).replace(',', '')))

### Rename Volume to rrc_oil_volume

In [10]:
production.rename(columns = {'Volume': 'rrc_oil_volume'}, inplace = True)
production.rename(columns = {'Volume.1': 'rrc_gas_volume'}, inplace = True)

In [11]:
production['rrc_oil_volume'].sum()
production['rrc_gas_volume'].sum()

395910

1400233

### Normalize Field and Reservoir Names with Text Cleaning

In [12]:
production["Normalized_Field_Name"] = production['Field Name'].dropna()\
                                                            .apply(lambda x: str(x).split('(')[0]\
                                                                                    .split(',')[0]\
                                                                                    .strip())

In [13]:
production["Normalized_Reservoir_Name"] = production['Field Name'].dropna()\
                                        .apply(lambda x: str(x).split('(')[-1]\
                                                               .strip().replace(')',''))

### Get Date from Production File

In [14]:
def get_date_from_csv(filepath, col_number, row_number):
    str_date = pd.read_csv(filepath, usecols = [col_number]).iloc[row_number].values[0]
    return str_date.strip()

In [15]:
get_date_from_csv(f'{raw_data_folder}/rrc_prod_blackbeard/2021/{file_name}', 3, 1)

'Jul 2021'

### Add Production Date to Production Data Frame

In [16]:
production.insert(0, 'Date', get_date_from_csv(f'{raw_data_folder}/rrc_prod_blackbeard/2021/{file_name}', 3, 1))

In [17]:
production

Unnamed: 0,Date,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,rrc_oil_volume,Code,On Hand End of Month,Formation Production,rrc_gas_volume,Code.1,Normalized_Field_Name,Normalized_Reservoir_Name
0,Jul 2021,,Oil,8.0,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,185,37,0,,222,0,0,,ARMER,TUBB
1,Jul 2021,,Oil,8.0,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,39,0,0,,39,0,0,,ARMER,6350
2,Jul 2021,,Oil,8.0,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,267,91,175,1.0,183,2165,2165,2.0,ARMER,6350
3,Jul 2021,,Oil,8.0,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,2013,8947,9078,0.0,1882,22367,22367,2.0,ARMER,6350
4,Jul 2021,,Oil,8.0,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,364,1581,1460,0.0,485,4933,4933,2.0,ARMER,6350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,Jul 2021,,Gas,8.0,147187.0,WADDELL (GRAYBURG),"WADDELL, W.N.",859,,8988.0,4,24,21,1.0,7,436,436,2.0,WADDELL,GRAYBURG
384,Jul 2021,,Gas,8.0,190864.0,WADDELL (GRAYBURG),"WADDELL, W.N.",890,,8988.0,0,0,0,,0,0,0,,WADDELL,GRAYBURG
385,Jul 2021,,Gas,8.0,190926.0,WADDELL (GRAYBURG),"WADDELL, W.N.",293,,3501.0,4,25,22,1.0,7,12,12,2.0,WADDELL,GRAYBURG
386,Jul 2021,,Gas,8.0,205775.0,WADDELL (GRAYBURG),"WADDELL, W. N.",1200,,8092.0,4,25,22,1.0,7,240,240,2.0,WADDELL,GRAYBURG


### Add columns for net production to Trust

In [18]:
production['Net Oil Volume to Trust (RRC)'] = production['rrc_oil_volume'] * 0.50
production['Net Oil Sales Volume to Trust (RRC)'] = production['rrc_oil_volume'] * 0.50 * 0.75

In [19]:
production['Net Gas Volume to Trust (RRC)'] = production['rrc_gas_volume'] * 0.50
production['Net Gas Sales Volume to Trust (RRC)'] = production['rrc_gas_volume'] * 0.50 * 0.75

In [20]:
production

Unnamed: 0,Date,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,rrc_oil_volume,Code,On Hand End of Month,Formation Production,rrc_gas_volume,Code.1,Normalized_Field_Name,Normalized_Reservoir_Name,Net Oil Volume to Trust (RRC),Net Oil Sales Volume to Trust (RRC),Net Gas Volume to Trust (RRC),Net Gas Sales Volume to Trust (RRC)
0,Jul 2021,,Oil,8.0,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,185,37,0,,222,0,0,,ARMER,TUBB,0.0,0.000,0.0,0.000
1,Jul 2021,,Oil,8.0,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,39,0,0,,39,0,0,,ARMER,6350,0.0,0.000,0.0,0.000
2,Jul 2021,,Oil,8.0,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,267,91,175,1.0,183,2165,2165,2.0,ARMER,6350,87.5,65.625,1082.5,811.875
3,Jul 2021,,Oil,8.0,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,2013,8947,9078,0.0,1882,22367,22367,2.0,ARMER,6350,4539.0,3404.250,11183.5,8387.625
4,Jul 2021,,Oil,8.0,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,364,1581,1460,0.0,485,4933,4933,2.0,ARMER,6350,730.0,547.500,2466.5,1849.875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,Jul 2021,,Gas,8.0,147187.0,WADDELL (GRAYBURG),"WADDELL, W.N.",859,,8988.0,4,24,21,1.0,7,436,436,2.0,WADDELL,GRAYBURG,10.5,7.875,218.0,163.500
384,Jul 2021,,Gas,8.0,190864.0,WADDELL (GRAYBURG),"WADDELL, W.N.",890,,8988.0,0,0,0,,0,0,0,,WADDELL,GRAYBURG,0.0,0.000,0.0,0.000
385,Jul 2021,,Gas,8.0,190926.0,WADDELL (GRAYBURG),"WADDELL, W.N.",293,,3501.0,4,25,22,1.0,7,12,12,2.0,WADDELL,GRAYBURG,11.0,8.250,6.0,4.500
386,Jul 2021,,Gas,8.0,205775.0,WADDELL (GRAYBURG),"WADDELL, W. N.",1200,,8092.0,4,25,22,1.0,7,240,240,2.0,WADDELL,GRAYBURG,11.0,8.250,120.0,90.000


### Write to File

In [21]:
production.to_csv(f'{processed_data_folder}/rrc_prod/{file_name}', index = False)

## Aggregated EDA

In [22]:
production.groupby(['Normalized_Field_Name']).agg({'rrc_oil_volume': 'sum'})\
                                            .sort_values(by = 'rrc_oil_volume', ascending= False)\
                                            .head(15)

Unnamed: 0_level_0,rrc_oil_volume
Normalized_Field_Name,Unnamed: 1_level_1
SAND HILLS,232428
MONAHANS,69427
ARMER,40953
DUNE,17928
UNIVERSITY WADDELL,10130
MARSTON RANCH,8893
RUNNING W,6196
WADDELL,6027
CORDONA LAKE,2713
LEA,1048


In [23]:
production.groupby(['Normalized_Reservoir_Name', 'Normalized_Field_Name']).agg({'rrc_oil_volume': 'sum'}).sort_values(by = 'rrc_oil_volume', ascending= False).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,rrc_oil_volume
Normalized_Reservoir_Name,Normalized_Field_Name,Unnamed: 2_level_1
6350,ARMER,40953
7900,EDWARDS -04-,0
CLEAR FORK,LEA,277
CLEAR FORK,MONAHANS,0
CLEAR FORK,SAND HILLS,47882
CLEAR FORK 4070,SAND HILLS,422
"CLEAR FORK, LOWER",MCKEE,0
"CLEAR FORK, MID.",MCKEE,0
CLEARFORK,MARSTON RANCH,8893
CLEARFORK,MONAHANS,64436
