In [None]:
import numpy as np
import pandas as pd
from config.config_loader import get_config
import AnalyticsAndDBScripts.sql_connect as sql
import AnalyticsAndDBScripts.prod_fcst_functions as fcst
import AnalyticsAndDBScripts.sql_schemas as schema

In [None]:
# Load configs
sql_creds_dict = get_config('credentials', 'sql1_sa')

# Add database name to the dictionary
sql_creds_dict['db_name'] = 'Analytics'

In [None]:
# Create a query statement
statement = '''
SELECT TOP 10 * FROM dbo.vw_FORECAST
'''

In [None]:
# Execute query and store results in a dataframe
engine = sql.sql_connect(
    username=sql_creds_dict['username'], 
    password=sql_creds_dict['password'], 
    db_name=sql_creds_dict['db_name'], 
    server_name=sql_creds_dict['servername'], 
    port=sql_creds_dict['port']
)
try:
    fcst_df = pd.read_sql(statement, engine)
finally:
    engine.dispose()

In [None]:
# Fill null values with 0 in fcst_df
fill_cols = ['Q1', 'Q2', 'Q3', 'Qabn', 'Dei', 'b_factor', 'Def', 't1', 't2']
fcst_df[fill_cols] = fcst_df[fill_cols].fillna(0.0)

In [None]:
def apply_arps(row, duration):
    '''
    Apply arps_segments function to each row of a dataframe.
    :param row: A row from a dataframe
    '''
    # Dictionary for mapping PHASE_INT to a measure
    reverse_phase_dict = {1: 'OIL', 2: 'GAS', 3: 'WATER'}
    
    # Ensure StartMonth < duration
    if (row['StartMonth'] >= duration) | (row['Q3'] <= row['Qabn']):
        # Create a DataFrame with a single row of default values
        data = {
            'WellID': [row['WellID']],
            'Measure': [reverse_phase_dict.get(row['PHASE_INT'], 'UNKNOWN')],
            'ProdMonth': [row['StartMonth']],
            'ProductionRate': [None],
            'De': [None],
            'CumulativeProduction': [row['StartCumulative']],
            'MonthlyVolume': [None],
            'ForecastID': [row['ForecastID']],
            'StartDate': [row['StartDate']],
            'StartMonth': [row['StartMonth']]
        }
        df = pd.DataFrame(data)
    else:
        # Otherwise, apply arps_segments function
        arr = fcst.arps_segments(
            row['WellID'], 
            row['PHASE_INT'],
            row['Q1'], 
            row['Q2'], 
            row['Q3'], 
            row['Dei'], 
            row['Def'], 
            round(row['b_factor'], 4), 
            row['Qabn'],
            row['t1'],
            row['t2'],
            duration,
            row['StartCumulative'],
            row['StartMonth']
        )
        df = pd.DataFrame(np.stack(arr).T, columns=['WellID', 'Measure', 'ProdMonth', 'ProductionRate', 'De', 'CumulativeProduction', 'MonthlyVolume'])
        df = df.dropna(subset=['ProdMonth'])
        df['Measure'] = df['Measure'].map(reverse_phase_dict)
    df['ForecastID'] = row['ForecastID']
    df['StartDate'] = row['StartDate']
    df['StartMonth'] = row['StartMonth']
    df[['WellID', 'ProdMonth', 'StartMonth']] = df[['WellID', 'ProdMonth', 'StartMonth']].astype('int64')
    return df

In [None]:
# Set forecast duration in months
duration = 360

# Apply arps_segments function to each row of the dataframe
monthly_df = pd.concat([apply_arps(row, duration) for _, row in fcst_df.iterrows()], ignore_index=True)

In [None]:
# Modify monthly_df to add a column to help calculate a Date column in SQL and resort columns
monthly_df['AdjustedMonth'] = monthly_df['ProdMonth'] - monthly_df['StartMonth']
col_order = ['ForecastID', 'WellID', 'Measure', 'ProdMonth', 'ProductionRate', 'De', 'CumulativeProduction', 'MonthlyVolume']
monthly_df = monthly_df[col_order]

In [None]:
# Convert NaN to None for proper database insertion
monthly_df = monthly_df.where(pd.notnull(monthly_df), None)

In [None]:
# Divide dataframe into chunks of 500000 rows
def split_dataframe(df, chunk_size):
    return [df.iloc[i:i + chunk_size] for i in range(0, len(df), chunk_size)]

chunk_size = 500000
dataframes_list = split_dataframe(monthly_df, chunk_size)

In [None]:
# Load well_df into dbo.FORECAST_VOLUME_STAGE table in SQL Server
for df_chunk in dataframes_list:
    sql.load_data_to_sql(df_chunk, sql_creds_dict, schema.forecast_volume_stage)

In [None]:
# Move data from dbo.FORECAST_STAGE to dbo.FORECAST_VOLUME and drop dbo.FORECAST_VOLUME_STAGE
sql.execute_stored_procedure(sql_creds_dict, 'sp_InsertFromStagingToForecastVolume')