# Imports

In [0]:
import pandas as pd
import pyspark.sql.functions as F
from pyspark.sql.types import DateType, TimestampType
from pyspark.sql.functions import col
from datetime import date

# Input Parameters 

In [0]:
# flow_unit_of_interest = dbutils.widgets.get("flow_unit_of_interest")
# basin_of_interest = dbutils.widgets.get("basin_of_interest")
# end_time = dbutils.widgets.get("inventory_drilling_end_time")
# buffer_days_for_rig_movement = int(dbutils.widgets.get("buffer_days_for_rig_movement"))
# scenario_id = dbutils.widgets.get("scenario_id")

In [0]:
flow_unit_of_interest = "HAYNESVILLE"
basin_of_interest = 'GULF COAST EAST'
end_time = "2040-04-01"
buffer_days_for_rig_movement = 10
scenario_id = "1"

# Inventory Data Download

In [0]:
class InventoryDownloader:
    def __init__(
        self,
        inventory_data_table: str,
        inventory_economics_data_table: str,
        analog_well_table,
        inventory_col: list,  # Added list for columns
        inventory_econo: list,  # Added list for economics columns
        flowUnit_of_interest: str,
        basin_of_interest,
    ):
        """
        Initializes the InventoryDownloader object with necessary parameters.

        Parameters:
        - inventory_data_table (str): Table name for inventory data.
        - inventory_economics_data_table (str): Table name for inventory economics data.
        - inventory_col (list): Columns to select for inventory data.
        - inventory_econo (list): Columns to select for inventory economics data.
        - flowUnit_of_interest (str): Basin of interest.
        """
        self.inventory_data_table = inventory_data_table
        self.inventory_economics_data_table = inventory_economics_data_table
        self.analog_well_table = analog_well_table
        self.inventory_col = inventory_col
        self.inventory_econo = inventory_econo
        self.flowUnit_of_interest = flowUnit_of_interest
        self.basin_of_interest = basin_of_interest

    def download_inventory_data(self, welltype="UNDEV") -> pd.DataFrame:
        """
        Downloads inventory data from PySpark and returns it as a Pandas DataFrame.

        Parameters:
        - spark: PySpark SparkSession.
        - welltype (str): Well type for filtering (default is "UNDEV").

        Returns:
        - pd.DataFrame: Inventory data as Pandas DataFrame.
        """
        inventory_data = spark.table(self.inventory_data_table).select(
            *self.inventory_col
        )
        inventory_data_df = inventory_data.filter(
            (col("wellType") == welltype)
            & (col("flowUnit") == self.flowUnit_of_interest)
        ).toPandas()

        return inventory_data_df

    def download_permit_wells(self):
        permit_wells_df = spark.sql(
            f"""
        SELECT
        *
        EXCEPT(LateralLength_FT, fu_median_ll, tca_median_ll),
        COALESCE(LateralLength_FT, tca_median_ll, fu_median_ll) as LateralLength_FT
        FROM(
            SELECT
            API10, BasinQuantum, OperatorGold, FlowUnit_Analog, LateralLength_FT, WellStatus, typeCurveArea, ReservoirGoldConsolidated,
            PERCENTILE_CONT(0.5) WITHIN GROUP (
                ORDER BY
                LateralLength_FT
            ) OVER () AS fu_median_ll,
            PERCENTILE_CONT(0.5) WITHIN GROUP (
                ORDER BY
                LateralLength_FT
            ) OVER(PARTITION BY typeCurveArea) AS tca_median_ll
            FROM
            {self.analog_well_table}
            WHERE
            recentWell = "true"
            AND flowUnit_Analog = '{self.flowUnit_of_interest}'
            AND BasinQuantum = '{self.basin_of_interest}'
            AND WellStatus in ("PERMITTED", "PERMIT PENDING")
        )
        """
        ).toPandas()

        permit_wells_df = permit_wells_df.loc[
            permit_wells_df.groupby("API10")["LateralLength_FT"].idxmax()
        ]
        return permit_wells_df

    def download_economics_data(self, entities_list, scenario="BaseScenario"):
        """
        Downloads inventory economics data from PySpark and returns it as a Pandas DataFrame.

        Parameters:
        - spark: PySpark SparkSession.
        - scenario (str): Scenario type for filtering (default is 'BaseScenario').

        Returns:
        - pd.DataFrame: Inventory economics data as Pandas DataFrame.
        """
        economics_undev_wells_data = spark.sql(
            f"select entityID, scenarioType, internalRateOfReturn from {self.inventory_economics_data_table} where scenarioType = '{scenario}' and entityID IN {entities_list}"
        ).toPandas()

        economics_undev_wells_data = economics_undev_wells_data.dropna(
            subset=["internalRateOfReturn"], axis=0
        )

        return economics_undev_wells_data

    def download_final_data(self, merge_how="left"):
        """
        Downloads final inventory data by merging inventory and economics data.

        Parameters:
        - spark: PySpark SparkSession.
        - merge_how (str): Merge method (default is 'left').

        Returns:
        - pd.DataFrame: Final inventory data as Pandas DataFrame.
        """
        df = self.download_inventory_data()
        df.rename(
            {"operator": "OperatorGold", "reservoir": "ReservoirGoldConsolidated"},
            axis=1,
            inplace=True,
        )

        # permit_wells = self.download_permit_wells()
        # permit_wells.rename({'API10': "entityID", "FlowUnit_Analog":"flowUnit", "WellStatus":"wellType"}, axis=1, inplace=True)

        # df = pd.concat([df, permit_wells])

        entities = tuple(df["entityID"].unique())
        economics_metrix_data = self.download_economics_data(entities)

        df = pd.merge(df, economics_metrix_data, on=["entityID"], how=merge_how)

        df = df.drop_duplicates()
        df.reset_index(inplace=True, drop=True)
        df = df.dropna(subset=["internalRateOfReturn"], axis=0)
        max_values = df.groupby("entityID")["internalRateOfReturn"].idxmax()
        df = df.loc[max_values]

        return df

In [0]:
inventory_table = "produced.qbi_well_oneline_merged"
invenotry_economic_table = "produced.qbi_economics_scenarios_undev"
analog_well_table = "produced.analog_well_selection"
inventory_col = (
    "entityID",
    "basinQuantum",
    "operator",
    "flowUnit",
    "LateralLength_ft",
    "wellType",
    "typeCurveArea",
    "reservoir",
)
inventory_econo = (
    "basinQuantum",
    "entityID",
    "internalRateOfReturn",
    "scenarioType",
    "typeCurveArea",
)
inventory_download = InventoryDownloader(
    inventory_table,
    invenotry_economic_table,
    analog_well_table,
    inventory_col,
    inventory_econo,
    flow_unit_of_interest,
    basin_of_interest,
)
inventory_df = inventory_download.download_final_data()

In [0]:
# permitted_wells_api = tuple(inventory_df[inventory_df.wellType == 'PERMITTED']['entityID'].unique())

In [0]:
# %sql
# select count(distinct(entityID)) from produced.economi where entityID in (select api10 from produced.analog_well_selection where WellStatus = 'PERMITTED') and Scenario_Type = 'BaseScenario'

In [0]:
# %sql
# select count(distinct(api10)), wellstatus from produced.analog_well_selection group by WellStatus

In [0]:
# inventory_table = "produced.qbi_well_oneline_merged"
# invenotry_economic_table = 'produced.qbi_economics_scenarios_undev'
# basin = 'GULF COAST EAST'
# inventory_col = ("entityID", "basinQuantum", "operator", 'flowUnit', 'LateralLength_ft', 'wellType', "typeCurveArea", "reservoir")
# inventory_econo = ("basinQuantum", "entityID", 'internalRateOfReturn', 'scenarioType', 'typeCurveArea')
# inventory_download = InventoryDownloader(inventory_table, invenotry_economic_table, inventory_col, inventory_econo, flow_unit_of_interest)
# inventory_df = inventory_download.download_final_data(spark)

# Addition of Permit Pending and Permitted

In [0]:
# class permitwellsdownloader:
#     def __init__(self,
#                  analog_well_table: str,
#                  permit_well_economics_data_table: str,
#                  inventory_col: list,  # Added list for columns
#                  inventory_econo: list,  # Added list for economics columns
#                  flowUnit_of_interest: str,
#                  basin_of_interest):
#         """
#         Initializes the InventoryDownloader object with necessary parameters.
# we
#         Parameters:
#         - analog_well_table (str): Table name for permit wells data.
#         - inventory_economics_data_table (str): Table name for inventory economics data.
#         - inventory_col (list): Columns to select for inventory data.
#         - inventory_econo (list): Columns to select for inventory economics data.
#         - flowUnit_of_interest (str): Basin of interest.
#         """
#         self.analog_well_table = analog_well_table
#         self.permit_well_economics_data_table = permit_well_economics_data_table
#         self.inventory_col = inventory_col
#         self.inventory_econo = inventory_econo
#         self.flowUnit_of_interest = flowUnit_of_interest


#     def well_download(self, status = ("PERMITTED", "PERMIT PENDING")):
#         query = f"""
#         SELECT
#             API10, basinQuantum, OperatorGold,
#         FROM
#             {self.rig_historical_table} AS com
#         INNER JOIN
#             {self.analog_well_table} AS ana
#         ON
#             ana.api10 = com.api10
#             AND ana.recentWell = 'true'
#             AND com.BasinQuantum = '{self.basin_of_interest}'
#             AND ana.FlowUnit_Analog = '{self.flow_unit_of_interest}'
#             AND date = '{desired_active_rig_date}'
#         """


#         df = spark.sql(query).toPandas()
#         df['LateralLength_FT'] = pd.to_numeric(df['LateralLength_FT'])
#         df = df.loc[df.groupby('api10')['LateralLength_FT'].idxmax()]
#         return df

# Cycle Time Functions

In [0]:
final_df = spark.sql(
    f"select * from produced.api_level_cycle_times where scenario_id = '{scenario_id}'"
).toPandas()

In [0]:
opr_tca_df = spark.sql(
    f"SELECT * FROM produced.operator_cycle_times where scenario_id = '{scenario_id}'"
).toPandas()

In [0]:
def get_rig_release_time(opr, tca, cycle_time_df=opr_tca_df, basin_df=final_df):

    if (
        len(
            cycle_time_df[
                (cycle_time_df.OperatorGold == opr)
                & (cycle_time_df.typeCurveArea == tca)
            ]
        )
        > 0
    ):
        return cycle_time_df[
            (cycle_time_df.OperatorGold == opr) & (cycle_time_df.typeCurveArea == tca)
        ]["time_taken_spud_to_rigrelease"].median()

    else:

        if len(basin_df[basin_df.OperatorGold == opr]) > 9:

            return basin_df[basin_df.OperatorGold == opr][
                "time_taken_spud_to_rigrelease"
            ].median()
        else:

            return basin_df["time_taken_spud_to_rigrelease"].median()

In [0]:
def get_spud_to_complete_time(opr, tca, cycle_time_df=opr_tca_df, basin_df=final_df):

    if (
        len(
            cycle_time_df[
                (cycle_time_df.OperatorGold == opr)
                & (cycle_time_df.typeCurveArea == tca)
            ]
        )
        > 0
    ):

        return cycle_time_df[
            (cycle_time_df.OperatorGold == opr) & (cycle_time_df.typeCurveArea == tca)
        ]["time_taken_spud_to_completion"].median()
    else:
        if len(basin_df[basin_df.OperatorGold == opr]) > 9:

            return basin_df[basin_df.OperatorGold == opr][
                "time_taken_spud_to_completion"
            ].median()
        else:

            return basin_df["time_taken_spud_to_completion"].median()

# Getting Rigs Data from rig model

In [0]:
rigs_df = spark.sql(
    f"SELECT * FROM produced.rig_model_table where scenario_id = '{scenario_id}' "
).toPandas()

In [0]:
rigs_df.rename({"api10": "API10", "operator": "OperatorGold"}, inplace=True, axis=1)

# Getting Miscellenous Opr

In [0]:
miscell_opr = spark.sql(
    f"select * from produced.rig_model_miscellaneous_opr where scenario_id = '{scenario_id}'"
).toPandas()

inventory_df.loc[
    inventory_df.OperatorGold.isin(miscell_opr.real_operator_name.unique()),
    "OperatorGold",
] = "Miscellaneous"

# Inventory Drilling and Scheduling

In [0]:
inventory_df["Status"] = "Undrilled"
inventory_df["inventory_spud_date"] = None
inventory_df["inventory_rig_release_date"] = None
inventory_df["inventory_completion_date"] = None
inventory_df["inventory_firstprod_date"] = None

In [0]:
def update_inventory(entity_id, single_date, inventory_df1, opr, tca, buffer_days):

    inventory_df1.loc[inventory_df1.entityID == entity_id, "Status"] = "Drilled"

    inventory_df1.loc[
        inventory_df1.entityID == entity_id, "inventory_spud_date"
    ] = single_date + pd.Timedelta(days=buffer_days)

    spud_to_rig_release = get_rig_release_time(opr, tca)

    inventory_df1.loc[
        inventory_df1.entityID == entity_id, "inventory_rig_release_date"
    ] = inventory_df1.loc[
        inventory_df1.entityID == entity_id, "inventory_spud_date"
    ] + pd.Timedelta(
        days=spud_to_rig_release
    )

    spud_to_completion = int(get_spud_to_complete_time(opr, tca))

    inventory_df1.loc[
        inventory_df1.entityID == entity_id, "inventory_completion_date"
    ] = inventory_df1.loc[
        inventory_df1.entityID == entity_id, "inventory_spud_date"
    ] + pd.Timedelta(
        days=spud_to_completion
    )

    inventory_df1.loc[
        inventory_df1.entityID == entity_id, "inventory_firstprod_date"
    ] = inventory_df1.loc[
        inventory_df1.entityID == entity_id, "inventory_spud_date"
    ] + pd.Timedelta(
        days=spud_to_completion
    )


def update_rigs(rig_id, opr, tca, entity_id, rigs_df, inventory_df1):

    inventory_rig_release_date_value = (
        inventory_df1.loc[
            inventory_df1.entityID == entity_id, "inventory_rig_release_date"
        ]
        .iloc[0]
        .date()
    )
    inventory_spud_date_value = (
        inventory_df1.loc[inventory_df1.entityID == entity_id, "inventory_spud_date"]
        .iloc[0]
        .date()
    )

    rig_df_new_row = {
        "rig_id": rig_id,
        "API10": entity_id,
        "OperatorGold": opr,
        "typeCurveArea": tca,
        "spud_date": inventory_spud_date_value,
        "rig_release_date": inventory_rig_release_date_value,
    }

    rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
    return rigs_df


def get_best_tca_same_operator(
    other_tca_with_sticks, opr, buffer_days_for_rig_movement
):
    other_tca_with_sticks["sticks_per_rig"] = None
    for tca_1 in other_tca_with_sticks.typeCurveArea.unique():
        curr_rigs_in_tca = rigs_df[
            (rigs_df.OperatorGold == opr)
            & (rigs_df.typeCurveArea == tca_1)
            & (rigs_df.spud_date <= single_date)
            & (
                single_date
                <= (
                    rigs_df.rig_release_date
                    + pd.Timedelta(days=buffer_days_for_rig_movement)
                )
            )
        ]["rig_id"].nunique()

        condition_other_tca_with_sticks = (
            other_tca_with_sticks.OperatorGold == opr
        ) & (other_tca_with_sticks.typeCurveArea == tca_1)

        other_tca_with_sticks.loc[
            condition_other_tca_with_sticks, "sticks_per_rig"
        ] = other_tca_with_sticks["entityID"] / (curr_rigs_in_tca + 1)

    best_tca = other_tca_with_sticks[
        other_tca_with_sticks["sticks_per_rig"]
        == other_tca_with_sticks["sticks_per_rig"].max()
    ]["typeCurveArea"].iloc[0]

    return best_tca


def get_best_operator_same_typecurve(
    same_tca_with_diff_opr_sticks, tca, buffer_days_for_rig_movement
):
    same_tca_with_diff_opr_sticks["sticks_per_rig"] = None

    for opr_1 in same_tca_with_diff_opr_sticks.OperatorGold.unique():
        curr_rigs_in_tca = rigs_df[
            (rigs_df.OperatorGold == opr_1)
            & (rigs_df.typeCurveArea == tca)
            & (rigs_df.spud_date <= single_date)
            & (
                single_date
                <= (
                    rigs_df.rig_release_date
                    + pd.Timedelta(days=buffer_days_for_rig_movement)
                )
            )
        ]["rig_id"].nunique()

        condition_other_tca_with_sticks = (
            same_tca_with_diff_opr_sticks.OperatorGold == opr_1
        )

        same_tca_with_diff_opr_sticks.loc[
            condition_other_tca_with_sticks, "sticks_per_rig"
        ] = same_tca_with_diff_opr_sticks["entityID"] / (curr_rigs_in_tca + 1)

    best_opr = same_tca_with_diff_opr_sticks[
        same_tca_with_diff_opr_sticks["sticks_per_rig"]
        == same_tca_with_diff_opr_sticks["sticks_per_rig"].max()
    ]["OperatorGold"].iloc[0]

    return best_opr


def get_best_inventory_entity_id(inventory_df1, opr, tca):
    max_irr_value = inventory_df1[
        (inventory_df1.OperatorGold == opr)
        & (inventory_df1.typeCurveArea == tca)
        & (inventory_df1.Status == "Undrilled")
    ]["internalRateOfReturn"].max()

    inventory_entity_id = inventory_df1[
        (inventory_df1.OperatorGold == opr)
        & (inventory_df1.typeCurveArea == tca)
        & (inventory_df1.Status == "Undrilled")
        & ((inventory_df1.internalRateOfReturn == max_irr_value))
    ]["entityID"].iloc[0]

    return inventory_entity_id

In [0]:
start_time = str(rigs_df.rig_release_date.min())
date_range = (pd.Timestamp(start_time), pd.Timestamp(end_time))
for single_date in pd.date_range(start=date_range[0], end=date_range[1], freq="D"):
    rig_release_df = rigs_df[rigs_df.rig_release_date == single_date]

    if len(rig_release_df) > 0:

        for i in range(len(rig_release_df)):

            rig_id = rig_release_df["rig_id"].iloc[i]
            opr = rig_release_df["OperatorGold"].iloc[i]
            tca = rig_release_df["typeCurveArea"].iloc[i]

            condition1 = (
                (inventory_df.OperatorGold == opr)
                & (inventory_df.typeCurveArea == tca)
                & (inventory_df.Status == "Undrilled")
            )
            total_sticks_under_opr_tca = inventory_df[condition1]

            if tca is not None and len(total_sticks_under_opr_tca) > 0:

                inventory_entity_id = get_best_inventory_entity_id(
                    inventory_df, opr, tca
                )
                update_inventory(
                    inventory_entity_id,
                    single_date,
                    inventory_df,
                    opr,
                    tca,
                    buffer_days_for_rig_movement,
                )
                rigs_df = update_rigs(
                    rig_id, opr, tca, inventory_entity_id, rigs_df, inventory_df
                )

            else:
                # if there is no inventory left in currect tca

                other_tca_with_sticks = (
                    inventory_df[
                        (inventory_df.OperatorGold == opr)
                        & (inventory_df.Status == "Undrilled")
                    ]
                    .groupby(["OperatorGold", "typeCurveArea"], as_index=False)[
                        "entityID"
                    ]
                    .count()
                )

                if len(other_tca_with_sticks) > 0:

                    best_tca = get_best_tca_same_operator(
                        other_tca_with_sticks, opr, buffer_days_for_rig_movement
                    )

                    inventory_entity_id = get_best_inventory_entity_id(
                        inventory_df, opr, best_tca
                    )
                    update_inventory(
                        inventory_entity_id,
                        single_date,
                        inventory_df,
                        opr,
                        best_tca,
                        buffer_days_for_rig_movement,
                    )

                    rigs_df = update_rigs(
                        rig_id,
                        opr,
                        best_tca,
                        inventory_entity_id,
                        rigs_df,
                        inventory_df,
                    )

                else:
                    # move rig within same tca but another operator

                    if tca is not None:
                        same_tca_with_diff_opr_sticks = (
                            inventory_df[
                                (inventory_df.typeCurveArea == tca)
                                & (inventory_df.Status == "Undrilled")
                            ]
                            .groupby(["OperatorGold", "typeCurveArea"], as_index=False)[
                                "entityID"
                            ]
                            .count()
                        )

                        if len(same_tca_with_diff_opr_sticks) > 0:
                            best_opr = get_best_operator_same_typecurve(
                                same_tca_with_diff_opr_sticks,
                                tca,
                                buffer_days_for_rig_movement,
                            )

                            inventory_entity_id = get_best_inventory_entity_id(
                                inventory_df, best_opr, tca
                            )

                            update_inventory(
                                inventory_entity_id,
                                single_date,
                                inventory_df,
                                best_opr,
                                tca,
                                buffer_days_for_rig_movement,
                            )
                            rigs_df = update_rigs(
                                rig_id,
                                best_opr,
                                tca,
                                inventory_entity_id,
                                rigs_df,
                                inventory_df,
                            )
                    else:
                        other_tca_with_diff_opr_sticks = (
                            inventory_df[inventory_df.Status == "Undrilled"]
                            .groupby("OperatorGold", as_index=False)["entityID"]
                            .count()
                        )

                        if len(other_tca_with_diff_opr_sticks) > 0:

                            max_entites = other_tca_with_diff_opr_sticks[
                                "entityID"
                            ].max()

                            best_opr = other_tca_with_diff_opr_sticks[
                                other_tca_with_diff_opr_sticks.entityID == max_entites
                            ]["OperatorGold"].iloc[0]

                            other_tca_with_sticks = (
                                inventory_df[
                                    (inventory_df.OperatorGold == best_opr)
                                    & (inventory_df.Status == "Undrilled")
                                ]
                                .groupby(
                                    ["OperatorGold", "typeCurveArea"], as_index=False
                                )["entityID"]
                                .count()
                            )

                            best_tca = get_best_tca_same_operator(
                                other_tca_with_sticks,
                                best_opr,
                                buffer_days_for_rig_movement,
                            )

                            inventory_entity_id = get_best_inventory_entity_id(
                                inventory_df, best_opr, best_tca
                            )

                            update_inventory(
                                inventory_entity_id,
                                single_date,
                                inventory_df,
                                best_opr,
                                tca,
                                buffer_days_for_rig_movement,
                            )
                            rigs_df = update_rigs(
                                rig_id,
                                best_opr,
                                tca,
                                inventory_entity_id,
                                rigs_df,
                                inventory_df,
                            )

    else:
        continue

  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  & (rigs_df.spud_date <= single_date)
  other_tca_with_sticks.loc[
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  & (rigs_df.spud_date <= single_date)
  other_tca_with_sticks.loc[
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  & (rigs_df.spud_date <= single_date)
  rigs_df = rigs_df.append(rig_df_new_row, ignore_index=True)
  rigs_df = rigs_df

In [0]:
inventory_df["inventory_spud_date"] = pd.to_datetime(
    inventory_df["inventory_spud_date"]
)
inventory_df["inventory_rig_release_date"] = pd.to_datetime(
    inventory_df["inventory_rig_release_date"]
)
inventory_df["inventory_completion_date"] = pd.to_datetime(
    inventory_df["inventory_completion_date"]
)
inventory_df["inventory_firstprod_date"] = pd.to_datetime(
    inventory_df["inventory_firstprod_date"]
)

In [0]:
rigs_df["scenario_id"] = scenario_id
inventory_df["scenario_id"] = scenario_id

# Creating Tables

In [0]:
spark.sql(
    f"""
          delete from produced.inventory_drilling_scheduling_table where scenario_id = "{scenario_id}"
          """
)
spark.sql(
    f"""
          delete from produced.rigs_future_schedule_table where scenario_id = "{scenario_id}"
          """
)

DataFrame[num_affected_rows: bigint]

In [0]:
spark.createDataFrame(inventory_df).write.format("delta").option(
    "mergeSchema", "true"
).mode("append").saveAsTable(f"produced.inventory_drilling_scheduling_table")

In [0]:
rigs_df["spud_date"] = pd.to_datetime(rigs_df["spud_date"])
rigs_df["rig_release_date"] = pd.to_datetime(rigs_df["rig_release_date"])

  rigs_df["spud_date"] = pd.to_datetime(rigs_df["spud_date"])
  rigs_df["spud_date"] = pd.to_datetime(rigs_df["spud_date"])
  rigs_df["spud_date"] = pd.to_datetime(rigs_df["spud_date"])
  rigs_df["rig_release_date"] = pd.to_datetime(rigs_df["rig_release_date"])
  rigs_df["rig_release_date"] = pd.to_datetime(rigs_df["rig_release_date"])
  rigs_df["rig_release_date"] = pd.to_datetime(rigs_df["rig_release_date"])


In [0]:
(
    spark.createDataFrame(rigs_df)
    .withColumn("date", col("date").cast(TimestampType()))
    .write.format("delta")
    .option("mergeSchema", "true")
    .mode("append")
    .saveAsTable(f"produced.rigs_future_schedule_table")
)

In [0]:
rigs_df.display()

date,rig_id,API10,reservoir_gold_consolidated,BasinQuantum,OperatorGold,typeCurveArea,FlowUnit_Analog,spud_date,rig_release_date,scenario_id
2023-11-01,1956,1701736795,HAYNESVILLE,GULF COAST EAST,Miscellaneous,HAYNESVILLE_HSVL_LA NORTH_CADDO CENTRAL,HAYNESVILLE,2023-08-29T00:00:00Z,2023-11-01T00:00:00Z,1
2023-11-01,2098,1701322289,HAYNESVILLE,GULF COAST EAST,COMSTOCK,HAYNESVILLE_HSVL_LA EAST_NORTH,HAYNESVILLE,2023-09-11T00:00:00Z,2023-11-01T00:00:00Z,1
2023-11-01,89,4234733494,HAYNESVILLE,GULF COAST EAST,AETHON,HAYNESVILLE_HSVL_TX DEEP_NORTH,HAYNESVILLE,2023-10-23T00:00:00Z,2023-12-03T00:00:00Z,1
2023-11-01,2434,4240530823,COTTON VALLEY,GULF COAST EAST,Miscellaneous,HAYNESVILLE_HSVL_TX DEEP_NORTH,HAYNESVILLE,2023-10-30T00:00:00Z,2023-12-02T00:00:00Z,1
2023-11-01,5503,4234733485,HAYNESVILLE,GULF COAST EAST,AETHON,HAYNESVILLE_HSVL_TX DEEP_NORTH,HAYNESVILLE,2023-08-05T00:00:00Z,2023-11-01T00:00:00Z,1
2023-11-01,4088,1703127313,HAYNESVILLE,GULF COAST EAST,CHK,HAYNESVILLE_HSVL_LA CORE_DE SOTO EAST,HAYNESVILLE,2023-08-25T00:00:00Z,2023-11-01T00:00:00Z,1
2023-11-01,1424,4200530494,HAYNESVILLE,GULF COAST EAST,AETHON,HAYNESVILLE_HSVL_TX DEEP_ SOUTH,HAYNESVILLE,2023-08-06T00:00:00Z,2024-01-07T00:00:00Z,1
2023-11-01,3046,4220335607,HAYNESVILLE,GULF COAST EAST,ROCKCLIFF,HAYNESVILLE_HSVL_TX EAST_CORE A WEST,HAYNESVILLE,2023-08-22T00:00:00Z,2023-11-01T00:00:00Z,1
2023-11-01,2337,4236538840,HAYNESVILLE,GULF COAST EAST,ROCKCLIFF,HAYNESVILLE_HSVL_TX EAST_CORE A WEST,HAYNESVILLE,2023-06-05T00:00:00Z,2023-11-01T00:00:00Z,1
2023-11-01,2356,1703127277,HAYNESVILLE,GULF COAST EAST,COMSTOCK,HAYNESVILLE_HSVL_LA CORE_DE SOTO WEST,HAYNESVILLE,2023-10-30T00:00:00Z,2023-11-28T00:00:00Z,1
