In [None]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from fbprophet import Prophet

# Load the merged dataset
data = pd.read_csv('merged_data_updated.csv')

# Feature selection and preprocessing
features = data.drop(columns=['Year', 'Month', 'Sales(In ThousandDollars)'])
target = data['Sales(In ThousandDollars)']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Gradient Boosting Regressor
gbr = GradientBoostingRegressor()
gbr.fit(X_train, y_train)
gbr_predictions = gbr.predict(X_test)
gbr_mae = mean_absolute_error(y_test, gbr_predictions)
print(f'Gradient Boosting Regressor MAE: {gbr_mae}')

# Prepare data for Prophet
data['date'] = pd.to_datetime(data[['Year', 'Month']].assign(DAY=1))
prophet_data = data[['date', 'Sales(In ThousandDollars)', 'ProductCategory']]
prophet_data = prophet_data.rename(columns={'date': 'ds', 'Sales(In ThousandDollars)': 'y'})

# Separate dataframes for each category
categories = ['MenClothing', 'WomenClothing', 'OtherClothing']
prophet_models = {}

for category in categories:
    category_data = prophet_data[prophet_data['ProductCategory'] == category]
    model = Prophet()
    model.fit(category_data)
    prophet_models[category] = model

# Load submission file
submission = pd.read_csv('submission.csv')

# Predict and fill in the submission file
for idx, row in submission.iterrows():
    year, month, category = row['Year'], row['Month'], row['ProductCategory']
    date = pd.to_datetime(f'{year}-{month}-01')
    
    # Gradient Boosting Prediction
    feature_row = data[(data['Year'] == year) & (data['Month'] == month) & (data[f'ProductCategory_{category}'] == True)]
    gbr_prediction = gbr.predict(feature_row.drop(columns=['Year', 'Month', 'Sales(In ThousandDollars)']))
    
    # Prophet Prediction
    model = prophet_models[category]
    future = model.make_future_dataframe(periods=1, include_history=False)
    future['ds'] = date
    forecast = model.predict(future)
    prophet_prediction = forecast['yhat'].values[0]
    
    # Combine predictions
    combined_prediction = (gbr_prediction + prophet_prediction) / 2
    submission.loc[idx, 'Sales(In ThousandDollars)'] = combined_prediction

# Save the updated submission file
submission.to_csv('submission_updated.csv', index=False)