In [1]:
import pandas as pd
import pickle


In [2]:
def prepare_forecasting_features_year(df):
    df = df.copy()
    shiftable_columns = ['Weekly_Sales', 'Temperature', 'MarkDown1','Fuel_Price',
                        'MarkDown2', 'MarkDown3', 'MarkDown4', 
                        'MarkDown5', 'CPI', 'Unemployment']
    dropable_columns  = ['Temperature', 'MarkDown1','Fuel_Price',
                        'MarkDown2', 'MarkDown3', 'MarkDown4', 
                        'MarkDown5', 'CPI', 'Unemployment']
    for col in shiftable_columns:
        df[f'lag_yearly_{col}'] = df.groupby(['Store', 'Dept'])[col].shift(52)
        df[f'{col}_historical_week_avg'] = df.groupby(
            ['Store', 'Dept', 'week']
        )[f'lag_yearly_{col}'].transform('mean')
    feature_df = df.drop(dropable_columns, axis=1)
    df_clean = feature_df.dropna(subset=['lag_yearly_Weekly_Sales'])
    return df_clean

In [3]:
df = pd.read_csv('../data/combined_data.csv')
with open('../src/models/year_prediction_xgboost.pkl', "rb") as file:
    model = pickle.load(file)
with open('../src/models/year_prediction_preprocessor.pkl', "rb") as file:
    preprocessor = pickle.load(file)

In [4]:
df["Date"] = pd.to_datetime(df["Date"])  
last_rows = df.sort_values("Date").groupby(["Store", "Dept"]).last().reset_index()
last_rows["Date"] = last_rows["Date"] + pd.DateOffset(years=1)
df = pd.concat([df, last_rows], ignore_index=True)
df = df.sort_values(["Store", "Dept", "Date"]).reset_index(drop=True)
df = prepare_forecasting_features_year(df)

In [5]:
next_year = df[df['Date']=='2013-10-26']

In [6]:
input_data_transformed = preprocessor.transform(next_year)
predictions = model.predict(input_data_transformed)


In [7]:
next_year = next_year.copy()  
next_year.loc[:, 'predicted_weekly_sales'] = predictions 
predictions_df = next_year[['Date', 'Store', 'Dept', 'predicted_weekly_sales']]
predictions_df.set_index('Date', inplace=True)



In [8]:
predictions_df

Unnamed: 0_level_0,Store,Dept,predicted_weekly_sales
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-10-26,1,1,38077.875000
2013-10-26,1,2,44304.589844
2013-10-26,1,3,9490.942383
2013-10-26,1,4,38394.382812
2013-10-26,1,5,24931.373047
...,...,...,...
2013-10-26,45,93,2213.137207
2013-10-26,45,94,4466.775879
2013-10-26,45,95,50632.375000
2013-10-26,45,97,6101.546875
