In [None]:
from flask import Blueprint, request, jsonify
import pandas as pd
import numpy as np
import json
from pymongo import DESCENDING
from utils.fetchFileDetails import file_details
from modules.modelBuilding import get_user_modelling_ads
from utils.db import (
    model_run_result,
    modelling_ads_result_collection,
    final_model_res_collection,

    model_run_result,
    modelling_ads_result_collection,
)

simulationOutput = Blueprint("simulationOutput", __name__)


def load_data(mod_path, contri_path, coeff_path, var_map_path):
    df_mod = pd.read_csv(mod_path)
    df_contri = pd.read_csv(contri_path)
    coef_data = pd.read_csv(coeff_path)
    var_map = pd.read_csv(var_map_path)

    return df_mod, df_contri, coef_data, var_map

def preprocess_dates(df_mod, df_contri):

    df_contri['Time Variable'] = pd.to_datetime(df_contri['Time Variable'], errors='coerce').dt.strftime('%d-%m-%Y')
    df_contri.rename(columns={'Time Variable': 'Date'}, inplace=True)

    def unify_date_format(df, date_column='Date'):
        def to_int(date_str):
            try:
                return int(date_str)
            except (ValueError, TypeError):
                return 0
        
        date_list = list(df[date_column].str.split(r'[\/\.\-]').str[0].map(to_int))
        max_date = max([date for date in date_list if date <= 31])
        
        if max_date <= 12:
            dayfirst = False  # month first
        else:
            dayfirst = True   # day first
        
        df[date_column] = pd.to_datetime(df[date_column], format="mixed", errors="coerce", dayfirst=dayfirst).dt.strftime('%d-%m-%Y')
        
        return df

    df_mod = unify_date_format(df_mod, 'Date')
    df_contri = unify_date_format(df_contri, 'Date')

    return df_mod, df_contri

def merge_and_process(df_mod, df_contri):
    merged_df = pd.merge(df_contri, df_mod, on=['Mother_SKU', 'Date'], how='left', suffixes=('', '_mod'))

    for col in df_contri.columns:
        if col + '_mod' in merged_df.columns:
            merged_df[col] = merged_df[col + '_mod']
            merged_df.drop(columns=[col + '_mod'], inplace=True)

    merged_df = merged_df.fillna(0)

    merged_df['Date'] = pd.to_datetime(merged_df['Date'], dayfirst=True)

    merged_df['Year'] = merged_df['Date'].dt.year
    merged_df['Month'] = merged_df['Date'].dt.strftime('%b')
    merged_df['Quarter'] = merged_df['Date'].dt.quarter.apply(lambda x: f'Q{x}')

    holiday_columns = [col for col in merged_df.columns if 'Holiday' in col]
    melt_df = pd.melt(merged_df, id_vars=[col for col in merged_df.columns if col not in holiday_columns],
                        value_vars=holiday_columns, var_name='Holiday_Type', value_name='Holiday')
    melt_df['Holiday_Status'] = melt_df.apply(
        lambda row: 'Non-Holiday' if row['Holiday'] == 0 else row['Holiday_Type'], axis=1)
    melt_df.drop(columns=['Holiday_Type', 'Holiday'], inplace=True)
    melt_df.rename(columns={'Holiday_Status': 'Holiday'}, inplace=True)
    raw_df = melt_df.copy()

    return raw_df

def actual_dod(data, reg_price):

    mosku = list(data.keys())[0]
    actual_dod = {
            'Mother_SKU' : mosku,
            'Q1' : data[mosku]['Original_Quarter_Table'][2]['Q1']/100,
            'Q2' : data[mosku]['Original_Quarter_Table'][2]['Q2']/100,
            'Q3' : data[mosku]['Original_Quarter_Table'][2]['Q3']/100,
            'Q4' : data[mosku]['Original_Quarter_Table'][2]['Q4']/100,
            reg_price : data[mosku]['Original_Quarter_Table'][0]['Q1']
    }

    act_DOD = pd.DataFrame([actual_dod]) 
   

    return act_DOD

def input_dod(data, reg_price):

    mosku = list(data.keys())[0]
    input_dod = {
            'Mother_SKU' : mosku,
            'Q1' : data[mosku]['Quarter_Table'][2]['Q1']/100,
            'Q2' : data[mosku]['Quarter_Table'][2]['Q2']/100,
            'Q3' : data[mosku]['Quarter_Table'][2]['Q3']/100,
            'Q4' : data[mosku]['Quarter_Table'][2]['Q4']/100,
            reg_price : data[mosku]['Quarter_Table'][0]['Q1']
    }

    input_DOD = pd.DataFrame([input_dod]) 
   

    return input_DOD

def multi_data_input(data, var_map):

    mosku = list(data.keys())[0]
    promo_data = []
    for promo in data[mosku]['Promo_Table']:
        promo_dict = {
            'Mother_SKU': mosku,
            'PromotionType': promo['PromotionType'],
            'Q1': promo['HistoricallyExisted'][0],
            'Q2': promo['HistoricallyExisted'][1],
            'Q3': promo['HistoricallyExisted'][2],
            'Q4': promo['HistoricallyExisted'][3]
        }
        promo_data.append(promo_dict)

    promo_df = pd.DataFrame(promo_data)
    promo_melt = pd.melt(promo_df, id_vars=['Mother_SKU', 'PromotionType'], var_name='Quarter', value_name='Value')
    promo_melt['Value'] = promo_melt['Value'].map({'Yes': 1, 'No': 0})
    multi_data = promo_melt.pivot_table(index=['Mother_SKU', 'Quarter'], columns='PromotionType', values='Value', aggfunc='first')
    multi_data.reset_index(inplace=True)
    multi_data.columns.name = None
    rename_dict = dict(zip(var_map['original'], var_map['replacement']))
    multi_data = multi_data.rename(columns=rename_dict)

    return multi_data


def simulation(raw_df, coef_data, input_DOD, act_DOD, multi_data, reg_price, dod, unitcost):
    # Round and log-transform input data
    input_DOD = round(input_DOD, 2)
    input_DOD1 = input_DOD.copy()
    input_DOD[reg_price] = np.log(input_DOD[reg_price])
    input_DOD.set_index(['Mother_SKU'], inplace=True)

    act_DOD = round(act_DOD, 2)
    act_DOD1 = act_DOD.copy()
    act_DOD[reg_price] = np.log(act_DOD[reg_price])
    act_DOD.set_index(['Mother_SKU'], inplace=True)

    # Prepare raw data and coefficient data
    Z = raw_df[['Mother_SKU', 'Date', 'Quarter']]
    unit_cost = raw_df.groupby(['Mother_SKU'])[unitcost].mean().reset_index()
    raw_df.set_index(['Mother_SKU', 'Date', 'Quarter'], inplace=True)
    
    coef_data.rename(columns={'Regular Price Variable': reg_price, 'DOD Variable': dod}, inplace=True)
    merged_coef = Z.merge(coef_data, on=['Mother_SKU'], how='left')
    merged_coef.set_index(['Mother_SKU', 'Date', 'Quarter'], inplace=True)
    merged_coef = merged_coef.reindex(index=raw_df.index)

    # Calculate historical predictions
    Historical_pre = pd.DataFrame()
    common_columns = merged_coef.columns.intersection(raw_df.columns)
    for column_name in common_columns:
        if pd.api.types.is_numeric_dtype(merged_coef[column_name]):
            Historical_pre[column_name] = merged_coef[column_name] * raw_df[column_name]

    # Simulate future values
    dod_columns = Historical_pre.columns[Historical_pre.columns.str.endswith("_DOD")]
    dod_df = pd.DataFrame(1, index=multi_data.index, columns=dod_columns)
    multi_data = pd.concat([multi_data, dod_df], axis=1)

    Pre = pd.DataFrame()
    Pre['XB'] = Historical_pre.sum(axis=1)
    Pre['base'] = np.log(raw_df['Predicted']) - Pre['XB']
    
    multi_dod_reg = input_DOD / act_DOD
    df = multi_dod_reg.reset_index()
    melted_df = df.melt(id_vars=['Mother_SKU', reg_price], var_name='Quarter', value_name=dod)
    melted_df = melted_df.sort_values(by=['Mother_SKU', 'Quarter'])
    melted_df1 = melted_df[['Mother_SKU', 'Quarter', dod, reg_price]].reset_index(drop=True)

    merged_multi1 = multi_data.merge(melted_df1, on=['Mother_SKU', 'Quarter'], how='left')
    merged_multi = Z.merge(merged_multi1, on=['Mother_SKU', 'Quarter'], how='left')
    merged_multi.set_index(['Mother_SKU', 'Date', 'Quarter'], inplace=True)

    Simulated_con = Historical_pre * merged_multi
    Simulated = pd.DataFrame()
    Simulated_Vol = pd.DataFrame()
    Simulated['Base'] = Pre['base']
    Simulated['XB'] = Simulated_con.sum(axis=1)
    Simulated_Vol['Volume'] = np.exp(Simulated['Base'] + Simulated['XB'])

    Simulated_Vol = Simulated_Vol.reset_index()
    Simulated_Vol = Simulated_Vol[['Mother_SKU', 'Quarter', 'Volume']]
    Simulated_Vol = Simulated_Vol.groupby(['Mother_SKU', 'Quarter'])['Volume'].sum().reset_index()

    # Calculate financial metrics
    unit_cost.set_index(['Mother_SKU'], inplace=True)
    unit_cost = unit_cost.reindex(index=act_DOD.index)

    pre_vol = raw_df['Predicted'].reset_index()
    pre_vol = pre_vol[['Mother_SKU', 'Quarter', 'Predicted']]
    pre_vol = pre_vol.groupby(['Mother_SKU', 'Quarter'])['Predicted'].sum().reset_index()

    melted_df1 = input_DOD1.melt(id_vars=["Mother_SKU", reg_price], var_name="Quarter", value_name="Fut_DOD")
    melted_df2 = act_DOD1.melt(id_vars=['Mother_SKU', reg_price], var_name='Quarter', value_name='His_DOD')

    melted_df1 = melted_df1.sort_values(by=['Mother_SKU', 'Quarter']).reset_index(drop=True)
    melted_df2 = melted_df2.sort_values(by=['Mother_SKU', 'Quarter']).reset_index(drop=True)

    calculation = melted_df1.merge(melted_df2, on=['Mother_SKU', 'Quarter'], how='left')
    calculation = calculation.merge(unit_cost, on=['Mother_SKU'], how='left')
    calculation = calculation.merge(pre_vol, on=['Mother_SKU', 'Quarter'], how='left')
    calculation = calculation.merge(Simulated_Vol, on=['Mother_SKU', 'Quarter'], how='left')

    calculation['Avg_selling_price'] = calculation[f'{reg_price}_x'] * (1 - calculation['His_DOD'])
    calculation['Fut_selling_price'] = calculation[f'{reg_price}_y'] * (1 - calculation['Fut_DOD'])
    calculation['Base_Revenue'] = calculation['Predicted'] * calculation['Avg_selling_price']
    calculation['Future_Revenue'] = calculation['Volume'] * calculation['Fut_selling_price']
    calculation['Base_Margin'] = calculation['Predicted'] * (calculation['Avg_selling_price'] - calculation[unitcost])
    calculation['Future_Margin'] = calculation['Volume'] * (calculation['Fut_selling_price'] - calculation[unitcost])
    calculation['Base Investment'] = calculation['Predicted'] * (calculation[f'{reg_price}_x'] - calculation['Avg_selling_price'])
    calculation['Future Investment'] = calculation['Volume'] * (calculation[f'{reg_price}_y'] - calculation['Fut_selling_price'])

    Final_calculation = pd.DataFrame()
    Final_calculation[['Mother_SKU', 'PREDICTED_Volume', 'Future_Volume', 'Base_Revenue', 'Future_Revenue', 'Base_Margin', 'Future_Margin', 'Base Investment', 'Future Investment']] = calculation[['Mother_SKU', 'Predicted', 'Volume', 'Base_Revenue', 'Future_Revenue', 'Base_Margin', 'Future_Margin', 'Base Investment', 'Future Investment']]

    sim_result = Final_calculation.groupby(['Mother_SKU']).sum().reset_index()
    sim_result = round(sim_result, 2)

    # Scale future parameters keeping Base as 100%
    sim_result['Future_Revenue'] = (sim_result['Future_Revenue'] / sim_result['Base_Revenue']) * 100
    sim_result['Future_Margin'] = (sim_result['Future_Margin'] / sim_result['Base_Margin']) * 100
    sim_result['Future_Volume'] = (sim_result['Future_Volume'] / sim_result['PREDICTED_Volume']) * 100
    sim_result['Future Investment'] = (sim_result['Future Investment'] / sim_result['Base Investment']) * 100

    # Original overall metrics calculation
    overall_metrics = [
        {
            "KPIs": "Revenue",
            "Baseline": 100,
            "Future_State_Scenario": sim_result['Future_Revenue'].sum(),
            "Change": round(sim_result['Future_Revenue'].sum() - 100)
        },
        {
            "KPIs": "Gross Margin",
            "Baseline": 100,
            "Future_State_Scenario": sim_result['Future_Margin'].sum(),
            "Change": round(sim_result['Future_Margin'].sum() - 100)
        },
        {
            "KPIs": "Volume",
            "Baseline": 100,
            "Future_State_Scenario": sim_result['Future_Volume'].sum(),
            "Change": round(sim_result['Future_Volume'].sum() - 100)
        },
        {
            "KPIs": "Investment",
            "Baseline": 100,
            "Future_State_Scenario": sim_result['Future Investment'].sum(),
            "Change": round(sim_result['Future Investment'].sum() - 100)
        }
    ]

    # Collect individual quarter results
    individual_quarters = calculation[['Mother_SKU', 'Quarter', 'Base_Revenue', 'Future_Revenue', 'Base_Margin', 'Future_Margin', 'Predicted', 'Volume', 'Base Investment', 'Future Investment']].copy()
    individual_quarters.columns = ['Mother_SKU', 'Quarter', 'Base_Revenue', 'Future_Revenue', 'Base_Margin', 'Future_Margin', 'PREDICTED_Volume', 'Future_Volume', 'Base_Investment', 'Future_Investment']
    individual_quarters = round(individual_quarters, 2)

    # Prepare data for Graph1 and Graph2
    graph1_data = {
        "Base_Revenue": [],
        "Future_Revenue": [],
        "Base_Volume": [],
        "Future_Volume": []
    }
    
    graph2_data = {
        "Base_Margin": [],
        "Future_Margin": [],
        "Base_Investment": [],
        "Future_Investment": []
    }

    quarters = sorted(individual_quarters['Quarter'].unique())

    for quarter in quarters:
        base_revenue = individual_quarters[individual_quarters['Quarter'] == quarter]['Base_Revenue'].sum()
        future_revenue = (individual_quarters[individual_quarters['Quarter'] == quarter]['Future_Revenue'].sum() / base_revenue) * 100
        
        base_volume = individual_quarters[individual_quarters['Quarter'] == quarter]['PREDICTED_Volume'].sum()
        future_volume = (individual_quarters[individual_quarters['Quarter'] == quarter]['Future_Volume'].sum() / base_volume) * 100
        
        base_margin = individual_quarters[individual_quarters['Quarter'] == quarter]['Base_Margin'].sum()
        future_margin = (individual_quarters[individual_quarters['Quarter'] == quarter]['Future_Margin'].sum() / base_margin) * 100
        
        base_investment = individual_quarters[individual_quarters['Quarter'] == quarter]['Base_Investment'].sum()
        future_investment = (individual_quarters[individual_quarters['Quarter'] == quarter]['Future_Investment'].sum() / base_investment) * 100

        graph1_data["Base_Revenue"].append(100)
        graph1_data["Future_Revenue"].append(future_revenue)
        graph1_data["Base_Volume"].append(100)
        graph1_data["Future_Volume"].append(future_volume)

        graph2_data["Base_Margin"].append(100)
        graph2_data["Future_Margin"].append(future_margin)
        graph2_data["Base_Investment"].append(100)
        graph2_data["Future_Investment"].append(future_investment)

    result = {
        "Overall_Metrics": overall_metrics,
        "Graph1": {
            "Base_Revenue": graph1_data["Base_Revenue"],
            "Future_Revenue": graph1_data["Future_Revenue"],
            "Base_Volume": graph1_data["Base_Volume"],
            "Future_Volume": graph1_data["Future_Volume"]
        },
        "Graph2": {
            "Base_Margin": graph2_data["Base_Margin"],
            "Future_Margin": graph2_data["Future_Margin"],
            "Base_Investment": graph2_data["Base_Investment"],
            "Future_Investment": graph2_data["Future_Investment"]
        }
    }

    json_result = json.dumps(result)
    open("NEWDATA.json", "w").write(json_result)

    return json_result



@simulationOutput.route("/simulationOutput", methods=["POST"])
def main():
    jdata = request.json
    email = jdata.get("email")
    project_name = jdata.get("projectName")
    session_name = jdata.get("sessionName")
    data = jdata.get("simulationInput")

    user_model_run_res = model_run_result.find(
        {
            "email": email,
            "projectName": project_name,
            "sessionName": session_name,
        }
    )

    recent_model = user_model_run_res.sort("modifiedAt", DESCENDING).limit(1)

    recent_model = list(recent_model)[0]

    user_modelling_ads = get_user_modelling_ads(
        email, project_name, session_name, recent_model["modellingBuild"]
    )

    user_final_model = final_model_res_collection.find_one(
        {
            "email": email,
            "projectName": project_name,
            "sessionName": session_name,
        }
    )
    if not user_final_model:
        raise Exception("Final Model not found")

    file_path, file_name, col_map = file_details(email, project_name, session_name)

    if col_map:

        mod_path = user_modelling_ads["filePath"]["modelling_ads_path"]
        contri_path = user_final_model["finalFilePath"]["optimal_contributions_path"]
        coeff_path = user_final_model["finalFilePath"]["coefficients_path"]
        var_map_path = user_modelling_ads["filePath"]["variable_mapping_path"]

        reg_price = col_map["Regular Price Variable"][0]
        dod = col_map["DOD Variable"][0]
        unitcost = col_map["Unit Cost"][0]

        df_mod, df_contri, coef_data, var_map = load_data(mod_path, contri_path, coeff_path, var_map_path)
        df_mod, df_contri = preprocess_dates(df_mod, df_contri)
        raw_df = merge_and_process(df_mod, df_contri)
        act_DOD = actual_dod(data, reg_price) # recieved from frontend 
        input_DOD = input_dod(data, reg_price) # recieved from frontend 
        multi_data = multi_data_input(data, var_map) # recieved from frontend 
        result = simulation(raw_df, coef_data, input_DOD, act_DOD, multi_data, reg_price, dod, unitcost)
        
        print(json.dumps(result, indent = 4)) 

        return result 

    else:
        return jsonify({"error": "Failed to fetch configuration from MongoDB"})
