In [30]:
import pandas as pd
import datetime as dt
import os
import seaborn as sns

pd.options.display.max_columns=1000
pd.options.display.max_rows = 1000

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%run configs.ipynb

In [2]:
def get_file_names(root_folder):
    
    file_names = list(filter(lambda x: 'DS_Store' not in x, os.listdir(root_folder)))
    return file_names

# Read and Concatenate RRC Prod

In [3]:
rrc_prod_processed_files = get_file_names(f'{processed_data_folder}/rrc_prod')

rrc_prod_processed_files

['oct_21.csv',
 'july_21.csv',
 'june_21.csv',
 'feb_21.csv',
 'may_21.csv',
 'september_21.csv',
 'jan_21.csv',
 'august_21.csv',
 'march_21.csv',
 'nov_21.csv',
 'april_21.csv']

In [4]:
rrc_prod_processed_files

['oct_21.csv',
 'july_21.csv',
 'june_21.csv',
 'feb_21.csv',
 'may_21.csv',
 'september_21.csv',
 'jan_21.csv',
 'august_21.csv',
 'march_21.csv',
 'nov_21.csv',
 'april_21.csv']

In [5]:
rrc_prod_dfs = [pd.read_csv(f'{processed_data_folder}/rrc_prod/{file}') for file in rrc_prod_processed_files]

In [6]:
rrc_prod_pres = pd.concat(rrc_prod_dfs)

### Transform Dates to Datetime objects so read in properly at viz tool

In [9]:
rrc_prod_pres['Date'] = pd.to_datetime(rrc_prod_pres['Date'])

In [11]:
rrc_prod_pres.groupby('Date').sum()[['rrc_oil_volume', 'Net Oil Volume to Trust (RRC)' ]]

Unnamed: 0_level_0,rrc_oil_volume,Net Oil Volume to Trust (RRC)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-01,359609,179804.5
2021-02-01,332355,166177.5
2021-03-01,442912,221456.0
2021-04-01,431732,215866.0
2021-05-01,444124,222062.0
2021-06-01,357811,178905.5
2021-07-01,395910,197955.0
2021-08-01,384725,192362.5
2021-09-01,375220,187610.0
2021-10-01,361442,180721.0


# PBT News Prod

In [12]:
pbt_news_prod_files = get_file_names(f'{processed_data_folder}/pbt_news_prod/')

In [13]:
pbt_dfs = [pd.read_csv(f'{processed_data_folder}/pbt_news_prod/{file}') for file in pbt_news_prod_files]

In [14]:
pbt_news_pres = pd.concat(pbt_dfs)

In [15]:
pbt_news_pres["Date"].value_counts()

Feb 2021     2
Sept 2021    2
Jul 2021     2
May 2021     2
Aug 2021     2
Jun 2021     2
Oct 2021     2
Mar 2021     2
Apr 2021     2
Jan 2021     2
Name: Date, dtype: int64

In [16]:
pbt_news_pres["Date"].nunique()

10

### Transform Dates to Datetime objects so read in properly at viz tool

In [17]:
pbt_news_pres['Date'] = pd.to_datetime(pbt_news_pres['Date'])

In [18]:
pbt_news_pres

Unnamed: 0,Date,Property,Gross Oil Volume (PBT News),Net Oil Volume (PBT News),Average Oil Price,Average Gas Price,Gross Gas Volume (PBT News),Net Gas Volume (PBT News)
0,2021-02-01,Waddell Ranch,50757,38068,58.09,2.28,190570,142928
1,2021-02-01,Texas Royalties,19947,17556,52.8,5.89,12461,10948
0,2021-09-01,Waddell Ranch,120643,90482,70.09,6.36,265108,198831
1,2021-09-01,Texas Royalties,17123,15191,68.94,7.36,10650,9442
0,2021-07-01,Waddell Ranch,102735,77051,71.32,3.45,249359,187019
1,2021-07-01,Texas Royalties,17690,15772,71.11,8.09,11870,10570
0,2021-05-01,Waddell Ranch,94554,70916,63.97,2.61,194689,146017
1,2021-05-01,Texas Royalties,19114,16949,61.94,6.62,10523,9333
0,2021-08-01,Waddell Ranch,108522,81392,66.66,3.44,257857,193393
1,2021-08-01,Texas Royalties,17941,15927,66.39,8.01,9871,8757


### Filtering Data to Waddell Ranch Only

In [19]:
pbt_news_pres = pbt_news_pres.loc[pbt_news_pres['Property']=='Waddell Ranch']

# EDA Before Writing to File

In [29]:
rrc_prod_pres['Date'].value_counts()

rrc_prod_pres.groupby('Date').agg({'RRC Identifier': 'nunique'})

2021-03-01    466
2021-04-01    466
2021-02-01    464
2021-05-01    464
2021-01-01    464
2021-08-01    464
2021-06-01    460
2021-11-01    460
2021-10-01    452
2021-09-01    437
2021-07-01    385
Name: Date, dtype: int64

Unnamed: 0_level_0,RRC Identifier
Date,Unnamed: 1_level_1
2021-01-01,408
2021-02-01,408
2021-03-01,410
2021-04-01,411
2021-05-01,409
2021-06-01,407
2021-07-01,332
2021-08-01,409
2021-09-01,384
2021-10-01,407


In [23]:
rrc_prod_pres.head()
rrc_prod_pres['District'].value_counts()
rrc_prod_pres.info()

Unnamed: 0,Date,Multiple Reports,Lease Type,District,RRC Identifier,Field Name,Lease Name,Gas Well ID,Lease Total,Commingle Permit No.,On Hand Beginning of Month,Production,rrc_oil_volume,Code,On Hand End of Month,Formation Production,rrc_gas_volume,Code.1,Normalized_Field_Name,Normalized_Reservoir_Name,Net Oil Volume to Trust (RRC),Net Oil Sales Volume to Trust (RRC),Net Gas Volume to Trust (RRC),Net Gas Sales Volume to Trust (RRC)
0,2021-10-01,,Oil,8.0,32907.0,ARMER (TUBB),"MCCAMEY, G. B. ""A"" (NCT-B)",,,7024.0,140,26,0,,166,0,0,,ARMER,TUBB,0.0,0.0,0.0,0.0
1,2021-10-01,,Oil,8.0,20550.0,ARMER (6350),"MCCAMEY, G. B., -A- /NCT-A/",,,7024.0,18,0,0,,18,0,0,,ARMER,6350,0.0,0.0,0.0,0.0
2,2021-10-01,,Oil,8.0,45314.0,ARMER (6350),RAYDEN MCCAMEY,,,,261,87,183,1.0,165,1699,1699,2.0,ARMER,6350,91.5,68.625,849.5,637.125
3,2021-10-01,Y,Oil,8.0,48393.0,ARMER (6350),LANDLUBBER,,,8026.0,1721,7857,8126,0.0,1452,19723,19723,2.0,ARMER,6350,4063.0,3047.25,9861.5,7396.125
4,2021-10-01,,Oil,8.0,49425.0,ARMER (6350),6 POUNDER NE,,,8026.0,507,870,1184,0.0,193,3832,3832,2.0,ARMER,6350,592.0,444.0,1916.0,1437.0


8.0    4982
Name: District, dtype: int64

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4982 entries, 0 to 465
Data columns (total 24 columns):
 #   Column                               Non-Null Count  Dtype         
---  ------                               --------------  -----         
 0   Date                                 4982 non-null   datetime64[ns]
 1   Multiple Reports                     717 non-null    object        
 2   Lease Type                           4391 non-null   object        
 3   District                             4982 non-null   float64       
 4   RRC Identifier                       4982 non-null   float64       
 5   Field Name                           4982 non-null   object        
 6   Lease Name                           4982 non-null   object        
 7   Gas Well ID                          2507 non-null   object        
 8   Lease Total                          148 non-null    object        
 9   Commingle Permit No.                 2990 non-null   float64       
 10  On Hand Begin

In [34]:
count_date_well = rrc_prod_pres.groupby(['Date', 'RRC Identifier']).agg({'rrc_oil_volume': 'count'})

In [40]:
count_date_well[count_date_well['rrc_oil_volume']>1]

Unnamed: 0_level_0,Unnamed: 1_level_0,rrc_oil_volume
Date,RRC Identifier,Unnamed: 2_level_1
2021-01-01,1224.0,5
2021-01-01,5263.0,7
2021-01-01,5283.0,3
2021-01-01,5294.0,8
2021-01-01,7155.0,5
2021-01-01,12207.0,4
2021-01-01,12508.0,4
2021-01-01,15266.0,9
2021-01-01,18745.0,8
2021-01-01,20914.0,4


# Write Tables to File

In [20]:
pbt_news_pres.to_csv(f'{presentation_data_folder}/pbt_news_prod/pbt_news_prod.csv', index = False)

In [21]:
rrc_prod_pres.to_csv(f'{presentation_data_folder}/rrc_prod_blackbeard/rrc_prod.csv', index = False)