# Imports

In [0]:
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

# Widgets and Parameters

In [0]:
dbutils.widgets.text("scenario", "all")
scenario_run = dbutils.widgets.get("scenario").lower()

In [0]:
# condition preparation
if scenario_run == "all":
    scenario_condition = "and scenario_name = 'default'"
else:
    scenario_condition = "and scenario_name = '{scenario_run}'"

In [0]:
scenarios= spark.sql(
    f"""
          select * 
          from raw.drillout_model_scenarios 
          where 1=1 
          {scenario_condition}
          """
).collect()

In [0]:
# Constants declaration
cutoff_perc_for_misc_operators = 5
buffer_days_for_rig_movement = 10
current_date = datetime.strptime("2022-01-01", "%Y-%m-%d")

In [0]:
# current_date = datetime.strptime(current_date, "%Y-%m-%d").date()

In [0]:
scenario_id = 'testing2'

# Cycle Time Model

In [0]:
for scenario in scenarios:
    params = {}
    params["basin_of_interest"] = scenario.basin
    params["cutoff_date_for_training_data"] = (
        (
            current_date
            - relativedelta(months=12 * scenario.cycle_times_min_cutoff_years)
        )
        .replace(day=1)
        .date()
    )
    params["flow_unit_of_interest"] = scenario.flow_unit
    params["scenario_id"] = 'testing'
    # params["scenario_id"] = scenario.scenario_id
    params['current_date'] = current_date.date()
    dbutils.notebook.run("./01-CYCLE-TIME-MODEL", 0, params)

# Rigs Model

In [0]:
for scenario in scenarios:
    params = {}
    params["user_input_for_num_of_rigs"] = 57
    params["basin_of_interest"] = scenario.basin
    params["desired_active_rig_date"] = (
        (current_date - relativedelta(months=scenario.active_rigs_min_cutoff_months))
        .replace(day=1)
        .date()
    )
    params["cutoff_date_to_train_rig_model"] = (
        (current_date - relativedelta(months=12 * scenario.rigs_min_cutoff_years))
        .replace(day=1)
        .date()
    )
    params["cutoff_perc_for_misc_operators"] = cutoff_perc_for_misc_operators
    # params["scenario_id"] = scenario.scenario_id
    params["scenario_id"] = 'testing'
    params['current_date'] = current_date.date()
dbutils.notebook.run("./02-RIG-MODEL", 0, arguments=params)

# Drilling Scheduling Model

In [0]:
for scenario in scenarios:
    params = {}
    params["flow_unit_of_interest"] = scenario.flow_unit
    params["inventory_drilling_end_time"] = (
        (current_date + relativedelta(months=12 * int(scenario.forecast_years)))
        .replace(day=1)
        .date()
    )
    params["buffer_days_for_rig_movement"] = buffer_days_for_rig_movement
    # params["scenario_id"] = scenario.scenario_id
    params["scenario_id"] = 'testing'
    dbutils.notebook.run("./03-DRILLING-SCHEDULING-MODEL", 0, params)

# Production Estimation Model

In [0]:
for scenario in scenarios:
    params = {}
    params["basin_of_interest"] = scenario.basin
    params["desired_minimum_date_for_producing_wells"] = (
        (
            current_date
            - relativedelta(months=12 * scenario.cycle_times_min_cutoff_years)
        )
        .replace(day=1)
        .date()
    )
    params["desired_maximum_date_for_producing_wells"] = (
        (current_date + relativedelta(months=12 * int(scenario.forecast_years)))
        .replace(day=1)
        .date()
    ) 
    params["desired_wip_wells_date"] = (
        (current_date - relativedelta(months=scenario.active_rigs_min_cutoff_months))
        .replace(day=1)
        .date()
    )
    params["cutoff_first_prod_date_for_wip_wells"] = (
        (current_date - relativedelta(months=scenario.active_rigs_min_cutoff_months))
        .replace(day=1)
        .date()
    )
    params["desired_first_prod_date_for_wip_wells"] = (
        (current_date - relativedelta(months=scenario.active_rigs_min_cutoff_months))
        .replace(day=1)
        .date()
    )
    params["buffer_days_for_rig_movement"] = buffer_days_for_rig_movement
    params["cutoff_date_for_new_wells"] = (
        (current_date - relativedelta(months=scenario.active_rigs_min_cutoff_months))
        .replace(day=1)
        .date()
        .strftime("%Y-%m")
    )
    params['current_date'] = current_date.date()
    # params["scenario_id"] = scenario.scenario_id
    params["scenario_id"] = 'testing'
    dbutils.notebook.run("./04-PRODUCTION-ESTIMATION-MODEL", 0, params)

# Downloading Final Data

In [0]:
def Stacked_plot_production_data_for_all_wells(start_date, end_date, dataframe, type_curve=None, operator=None, reservoir=None):  
    dataframe = dataframe[(dataframe['production_date'] >= start_date) & (dataframe['production_date'] <= end_date)] 
    
    if type_curve:
        dataframe = dataframe[dataframe.typeCurveArea==type_curve]

    if operator:
        dataframe = dataframe[dataframe.OperatorGold==operator]

    if reservoir:
        dataframe = dataframe[dataframe.ReservoirGoldConsolidated==reservoir]
    

    
    new_wells_production = dataframe[dataframe.WellStatus == 'new_wells'].groupby('production_month_year', as_index=False)[
        ['Oil_MBO', 'Gas_BCFD', 'Water_MBO']].sum()
    wip_wells_production = dataframe[dataframe.WellStatus == 'wip_wells'].groupby('production_month_year', as_index=False)[
        ['Oil_MBO', 'Gas_BCFD', 'Water_MBO']].sum()
    inventory_wells_production = dataframe[dataframe.WellStatus == 'inventory_wells'].groupby('production_month_year', as_index=False)[
        ['Oil_MBO', 'Gas_BCFD', 'Water_MBO']].sum()
    producing_wells_production = dataframe[dataframe.WellStatus == 'producing_wells'].groupby('production_month_year', as_index=False)[
        ['Oil_MBO', 'Gas_BCFD', 'Water_MBO']].sum()
    
    all_dates = pd.concat([new_wells_production['production_month_year'], wip_wells_production['production_month_year'],
                          inventory_wells_production['production_month_year'], producing_wells_production['production_month_year']]).unique() 

    unique_dates_df = pd.DataFrame({'production_month_year': all_dates})
    merged_df = unique_dates_df
    well_types = ["New Wells", "Wip Wells", "Inventory Wells", "PDP Wells"]
    
    for i, df in enumerate([new_wells_production, wip_wells_production, inventory_wells_production, producing_wells_production]):
        suffix = f'_{well_types[i]}'
        merged_df = merged_df.merge(df, on='production_month_year', how='outer', suffixes=('', suffix))
    
    merged_df.rename({'Oil_MBO': 'Oil_MBO_New Wells', 'Gas_BCFD': 'Gas_BCFD_New Wells',
                      'Water_MBO': 'Water_MBO_New Wells'}, inplace=True, axis=1)
    
    merged_df = merged_df.fillna(0)
    merged_df['production_month_year'] = merged_df['production_month_year'].astype(str)
    merged_df = merged_df[merged_df['production_month_year']<"2040-01"]
    fig_gas = px.area(merged_df, x='production_month_year', 
                  y=['Gas_BCFD_PDP Wells', 'Gas_BCFD_New Wells', 'Gas_BCFD_Wip Wells', 'Gas_BCFD_Inventory Wells'], 
                  title='Stacked Plot of Gas for all wells status',
                  labels={'value': 'amount of gas (BCFD)'},
                  template='plotly_dark',  
                  color_discrete_map={'Gas_BCFD_PDP Wells': '#FFA500', 'Gas_BCFD_New Wells': '#33CC33', 
                                      'Gas_BCFD_Wip Wells': '#0000FF', 'Gas_BCFD_Inventory Wells': '#FF00FF'},
                  )
    fig_gas.update_layout(xaxis_title='Time')
    
    fig_oil = px.area(merged_df, x='production_month_year', 
                  y=['Oil_MBO_PDP Wells', 'Oil_MBO_New Wells', 'Oil_MBO_Wip Wells', 'Oil_MBO_Inventory Wells'], 
                  title='Stacked Plot of Oil for all wells status',
                  labels={'value': 'amount of oil (MBO)'},
                  template='plotly_dark',  
                  color_discrete_map={'Oil_MBO_PDP Wells': '#FFA500', 'Oil_MBO_New Wells': '#33CC33', 
                                      'Oil_MBO_Wip Wells': '#0000FF', 'Oil_MBO_Inventory Wells': '#FF00FF'},
                  )
    
    fig_oil.update_layout(xaxis_title='Time')
    
    fig_water = px.area(merged_df, x='production_month_year', 
                  y=['Water_MBO_PDP Wells', 'Water_MBO_New Wells', 'Water_MBO_Wip Wells', 'Water_MBO_Inventory Wells'], 
                  title='Stacked Plot of Water for all wells status',
                  labels={'value': 'amount of water (MBO)'},
                  template='plotly_dark',  
                  color_discrete_map={'Water_MBO_PDP Wells': '#FFA500', 'Water_MBO_New Wells': '#33CC33', 
                                      'Water_MBO_Wip Wells': '#0000FF', 'Water_MBO_Inventory Wells': '#FF00FF'},
                  )
    fig_water.update_layout(xaxis_title='Time')

    fig_gas.show()
    # fig_oil.show()
    # fig_water.show()




In [0]:
def Stacked_plot_wells_coming_online_for_all_wells(start_date, end_date, dataframe, type_curve=None, operator=None, reservoir=None):  
    dataframe = dataframe[(dataframe['production_date'] >= start_date) & (dataframe['production_date'] <= end_date)] 
    
    if type_curve:
        dataframe = dataframe[dataframe.typeCurveArea==type_curve]

    if operator:
        dataframe = dataframe[dataframe.OperatorGold==operator]

    if reservoir:
        dataframe = dataframe[dataframe.ReservoirGoldConsolidated==reservoir]
    

    
    new_wells_production = dataframe[(dataframe.WellStatus == 'new_wells') & (dataframe.producing_month==1)].groupby('production_month_year', as_index=False)["entityID"].nunique()

    wip_wells_production = dataframe[(dataframe.WellStatus == 'wip_wells')  & (dataframe.producing_month==1)].groupby('production_month_year', as_index=False)["entityID"].nunique()

    inventory_wells_production = dataframe[(dataframe.WellStatus == 'inventory_wells') & (dataframe.producing_month==1)].groupby('production_month_year', as_index=False)["entityID"].nunique()

    producing_wells_production = dataframe[(dataframe.WellStatus == 'producing_wells') & (dataframe.producing_month==1)].groupby('production_month_year', as_index=False)["entityID"].nunique()
    
    all_dates = pd.concat([new_wells_production['production_month_year'], wip_wells_production['production_month_year'],
                          inventory_wells_production['production_month_year'], producing_wells_production['production_month_year']]).unique() 

    unique_dates_df = pd.DataFrame({'production_month_year': all_dates})
    merged_df = unique_dates_df
    well_types = ["New Wells", "Wip Wells", "Inventory Wells", "PDP Wells"]
    
    for i, df in enumerate([new_wells_production, wip_wells_production, inventory_wells_production, producing_wells_production]):
        suffix = f'_{well_types[i]}'
        merged_df = merged_df.merge(df, on='production_month_year', how='outer', suffixes=('', suffix))
    
    merged_df.rename({'entityID': 'entityID_New Wells'}, inplace=True, axis=1)
    
    merged_df = merged_df.fillna(0)
    merged_df['production_month_year'] = merged_df['production_month_year'].astype(str)
    
    fig = px.area(merged_df, x='production_month_year', 
                  y=['entityID_PDP Wells', 'entityID_New Wells', 'entityID_Wip Wells', 'entityID_Inventory Wells'], 
                  title='Stacked Plot for wells coming online for all wells status',
                  labels={'value': 'number of wells'},
                  template='plotly_dark',  
                  color_discrete_map={'entityID_PDP Wells': '#FFA500', 'entityID_New Wells': '#33CC33', 
                                      'entityID_Wip Wells': '#0000FF', 'entityID_Inventory Wells': '#FF00FF'},
                  )
    fig.update_layout(xaxis_title='Time')
    fig.show()


In [0]:
import pandas as pd
import plotly.express as px
Stacked_plot_production_data_for_all_wells('2019-01-01', '2040-12-31', final_df, reservoir="HAYNESVILLE")

In [0]:
Stacked_plot_wells_coming_online_for_all_wells('2019-01-01', '2040-12-31', final_df, reservoir="HAYNESVILLE")

In [0]:
%sql
select * from produced.drillout_model_scenarios

scenario_id,scenario_name,basin,flow_unit,number_of_rigs,forecast_years,cycle_times_min_cutoff_years,rigs_min_cutoff_years,active_rigs_min_cutoff_months
2,testing,GULF COAST EAST,HAYNESVILLE,35,15,5,2,6
1,default,GULF COAST EAST,HAYNESVILLE,30,15,5,2,6


In [0]:
import pandas as pd
import plotly.express as px


final_df = spark.sql("select * from produced.production_estimation_model where scenario_id = '2'").toPandas()  # user input number of rigs = 35

# final_df = spark.sql("select * from produced.production_estimation_model where scenario_id = '1'").toPandas()  # user input number of rigs = 30


In [0]:
for well_type in final_df.forecastType.unique():
    print(well_type, final_df[final_df.forecastType==well_type]['entityID'].nunique())

inventory_wells 3741
producing_wells 5318
wip_wells 57
drilled_completed 333
new_wells 7


In [0]:
def Stacked_plot_production_data_for_all_wells(start_date, end_date, dataframe, type_curve=None, operator=None, reservoir=None): 

    dataframe = dataframe[(dataframe['production_date'] >= start_date) & (dataframe['production_date'] <= end_date)] 
    
    if type_curve:
        dataframe = dataframe[dataframe.typeCurveArea==type_curve]

    if operator:
        dataframe = dataframe[dataframe.OperatorGold==operator]

    if reservoir:
        dataframe = dataframe[dataframe.ReservoirGoldConsolidated==reservoir]
    

    new_wells_production = dataframe[dataframe.forecastType == 'new_wells'].groupby('production_month_year', as_index=False)[
        ['Oil_MBO', 'Gas_BCFD', 'Water_MBO']].sum()
    wip_wells_production = dataframe[dataframe.forecastType == 'wip_wells'].groupby('production_month_year', as_index=False)[
        ['Oil_MBO', 'Gas_BCFD', 'Water_MBO']].sum()
    inventory_wells_production = dataframe[dataframe.forecastType == 'inventory_wells'].groupby('production_month_year', as_index=False)[
        ['Oil_MBO', 'Gas_BCFD', 'Water_MBO']].sum()
    producing_wells_production = dataframe[dataframe.forecastType == 'producing_wells'].groupby('production_month_year', as_index=False)[
        ['Oil_MBO', 'Gas_BCFD', 'Water_MBO']].sum()
    drilled_completed_wells = dataframe[dataframe.forecastType == 'drilled_completed'].groupby('production_month_year', as_index=False)[
        ['Oil_MBO', 'Gas_BCFD', 'Water_MBO']].sum()

    all_dates = pd.concat([new_wells_production['production_month_year'], 
                           wip_wells_production['production_month_year'],
                          inventory_wells_production['production_month_year'], producing_wells_production['production_month_year'],
                          drilled_completed_wells['production_month_year'],
                          ]).unique() 


    unique_dates_df = pd.DataFrame({'production_month_year': all_dates})
    merged_df = unique_dates_df
    well_types = ["New Wells", "Wip Wells", "Inventory Wells", "PDP Wells", "Drilled Completed Wells"]
    
    for i, df in enumerate([new_wells_production, wip_wells_production, inventory_wells_production, producing_wells_production, drilled_completed_wells]):
        suffix = f'_{well_types[i]}'
        merged_df = merged_df.merge(df, on='production_month_year', how='outer', suffixes=('', suffix))
    
    merged_df.rename({'Oil_MBO': 'Oil_MBO_New Wells', 'Gas_BCFD': 'Gas_BCFD_New Wells',
                      'Water_MBO': 'Water_MBO_New Wells'}, inplace=True, axis=1)
    
    merged_df = merged_df.fillna(0)
    merged_df['production_month_year'] = merged_df['production_month_year'].astype(str)
    merged_df = merged_df[merged_df['production_month_year']<"2040-01"]
    fig_gas = px.area(merged_df, x='production_month_year', 
                  y=['Gas_BCFD_PDP Wells', 'Gas_BCFD_New Wells', 'Gas_BCFD_Wip Wells', 'Gas_BCFD_Inventory Wells', "Gas_BCFD_Drilled Completed Wells"], 
                  title='Stacked Plot of Gas for all wells status',
                  labels={'value': 'amount of gas (BCFD)'},
                  template='plotly_dark',  
                  color_discrete_map={'Gas_BCFD_PDP Wells': '#FFA500', 
                                      'Gas_BCFD_New Wells': '#33CC33', 
                                      'Gas_BCFD_Wip Wells': '#0000FF', 
                                      'Gas_BCFD_Inventory Wells': '#FF00FF',
                                      'Gas_BCFD_Drilled Completed Wells': '#FF0000'},
                  )
    fig_gas.update_layout(xaxis_title='Time')

    fig_gas.show()





In [0]:

def Stacked_plot_wells_coming_online_for_all_wells(start_date, end_date, dataframe, type_curve=None, operator=None, reservoir=None):  
    dataframe = dataframe[(dataframe['production_date'] >= start_date) & (dataframe['production_date'] <= end_date)] 
    
    if type_curve:
        dataframe = dataframe[dataframe.typeCurveArea==type_curve]

    if operator:
        dataframe = dataframe[dataframe.OperatorGold==operator]

    if reservoir:
        dataframe = dataframe[dataframe.ReservoirGoldConsolidated==reservoir]
    

    
    new_wells_production = dataframe[(dataframe.forecastType == 'new_wells') & (dataframe.producing_month==1)].groupby('production_month_year', as_index=False)["entityID"].nunique()

    wip_wells_production = dataframe[(dataframe.forecastType == 'wip_wells')  & (dataframe.producing_month==1)].groupby('production_month_year', as_index=False)["entityID"].nunique()

    inventory_wells_production = dataframe[(dataframe.forecastType == 'inventory_wells') & (dataframe.producing_month==1)].groupby('production_month_year', as_index=False)["entityID"].nunique()

    producing_wells_production = dataframe[(dataframe.forecastType == 'producing_wells') & (dataframe.producing_month==1)].groupby('production_month_year', as_index=False)["entityID"].nunique()

    drilled_comp_wells = dataframe[(dataframe.forecastType == 'drilled_completed') & (dataframe.producing_month==1)].groupby('production_month_year', as_index=False)["entityID"].nunique()
    
    all_dates = pd.concat([new_wells_production['production_month_year'],
                            wip_wells_production['production_month_year'],
                          inventory_wells_production['production_month_year'], producing_wells_production['production_month_year'],
                          drilled_comp_wells['production_month_year'],
                          ]).unique() 
    

    unique_dates_df = pd.DataFrame({'production_month_year': all_dates})
    merged_df = unique_dates_df
    well_types = ["New Wells", "Wip Wells", "Inventory Wells", "PDP Wells", "Drilled Completed Wells"]
    
    for i, df in enumerate([new_wells_production, wip_wells_production, inventory_wells_production, producing_wells_production, drilled_comp_wells]):
        suffix = f'_{well_types[i]}'
        merged_df = merged_df.merge(df, on='production_month_year', how='outer', suffixes=('', suffix))
    
    merged_df.rename({'entityID': 'entityID_New Wells'}, inplace=True, axis=1)
    
    merged_df = merged_df.fillna(0)
    merged_df['production_month_year'] = merged_df['production_month_year'].astype(str)
    
    fig = px.area(merged_df, x='production_month_year', 
                  y=['entityID_PDP Wells', 'entityID_New Wells', 'entityID_Wip Wells', 'entityID_Inventory Wells', 'entityID_Drilled Completed Wells'], 
                  title='Stacked Plot for wells coming online for all wells status',
                  labels={'value': 'number of wells'},
                  template='plotly_dark',  
                  color_discrete_map={'entityID_PDP Wells': '#FFA500', 
                                      'entityID_New Wells': '#33CC33', 
                                      'entityID_Wip Wells': '#0000FF', 
                                      'entityID_Inventory Wells': '#FF00FF',
                                      'entityID_Drilled Completed Wells': '#FF0000'},
                  )
    fig.update_layout(xaxis_title='Time')
    fig.show()


In [0]:
Stacked_plot_production_data_for_all_wells('2019-01-01', '2040-12-31', final_df, reservoir="HAYNESVILLE")

In [0]:
Stacked_plot_wells_coming_online_for_all_wells('2019-01-01', '2040-12-31', final_df, reservoir="HAYNESVILLE")