In [None]:
!pip install --upgrade statsmodels

In [None]:
import pandas as pd
import numpy as np 
from matplotlib import pyplot as plt
import seaborn as sns
import tqdm
import copy
import itertools
import multiprocessing



from statsmodels.tsa.seasonal import seasonal_decompose
# holt winters 
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import statsmodels as sm

In [None]:
VERBOSE_TRAIN=True

In [None]:
df = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv', index_col='row_id', parse_dates=['date'])
test_df = pd.read_csv('../input/tabular-playground-series-jan-2022/test.csv', parse_dates=['date'])

In [None]:
def smape_error(actual: np.ndarray, predicted: np.ndarray):
    return np.mean(2.0 * np.abs(actual - predicted) / ((np.abs(actual) + np.abs(predicted)) + 1e-10))

def evaluate(data, trend_model, seasonal_model, seasonal_periods=7, validation_len=90, verbose=False, method='SLSQP', **kwargs):
    
    min_date = data.index.min()
    
    train_df = data.iloc[:-validation_len]
    
    test_df = data.iloc[-validation_len:]
    
    fitted_model = ExponentialSmoothing(train_df['num_sold'], trend=trend_model,seasonal=seasonal_model,seasonal_periods=seasonal_periods, freq='D', **kwargs).fit(method=method)
    
    try:
        # Check if model converged
        assert fitted_model.mle_retvals.success
        full_fitted_model = ExponentialSmoothing(
            data['num_sold'],
            trend=trend_model,
            seasonal=seasonal_model,
            seasonal_periods=seasonal_periods, freq='D', **kwargs
        ).fit(method=method)
        # Check if model converged
        assert not full_fitted_model.forecast(400).isna().any()
        
        test_predictions = fitted_model.forecast(validation_len)
        if verbose:
            train_df[min_date:]['num_sold'].plot(legend=True,label='TRAIN')
            test_df[min_date:]['num_sold'].plot(legend=True,label='TEST',figsize=(6,4))
            test_predictions.plot(legend=True,label='PREDICTION')
            plt.title(f"Trend: {trend_model}, seasonal: {seasonal_model}, periods: {seasonal_periods}")
            plt.show()

        smape = smape_error(test_df['num_sold'],test_predictions)
        if verbose:
            print(f"sMAPE on test = {smape}")
    except Exception as e:
        # Return huge values to be ignored
        return float('inf'),float('inf'), None, None
    
    ret_params = dict(trend_model=trend_model, seasonal_model=seasonal_model)
    ret_params.update(kwargs)
    return smape, fitted_model, full_fitted_model, ret_params
    

In [None]:
SEASON=365
model_preset = {
    'seasonal_periods': SEASON,
    'validation_len': 366,
    
}


PARAMS = {
    "trend_model": ["add", "mul"],
    "seasonal_model": ["add", "mul"],
    "damped_trend": [True, False],
    'use_boxcox': [True, False]
}

def optimize(data, params=None):   
    if params is None:
        param_history = {}
        for param_values in list(itertools.product(*list(PARAMS.values()))):
            model_params = dict(zip(PARAMS.keys(), param_values))
            model_params.update(model_preset)
            smape, _, _, _ = evaluate(data, **model_params)
            param_history[param_values] = smape

        best_values = min(param_history, key=param_history.get)
        model_params = dict(zip(PARAMS.keys(), best_values))
        model_params.update(model_preset)
    else:
        model_key = (data.iloc[0]['country'],data.iloc[0]['store'],data.iloc[0]['product'])
        model_params = params[model_key]
    return evaluate(data, verbose=VERBOSE_TRAIN, **model_params)

In [None]:
def train_single_HW_model(df, country, store, product, params=None):
    data = df[(df.country==country) & (df.store==store) & (df['product'] == product)].set_index('date')
    return optimize(data, params)

In [None]:
from joblib import Parallel, delayed

def train_HW_models(df, params=None):
    models = {}
    final_models = {}
    best_params = {}
    result = Parallel(n_jobs=4)(delayed(train_single_HW_model)(df, triplet[0],triplet[1],triplet[2], params) for triplet in itertools.product(df.country.unique(), df.store.unique(), df['product'].unique()))
    models = {
        model: res[1]
        for res, model 
        in zip(result, itertools.product(df.country.unique(), df.store.unique(), df['product'].unique()))
    }
    final_models = {
        model: res[2]
        for res, model
        in zip(result, itertools.product(df.country.unique(), df.store.unique(), df['product'].unique()))
    }
    best_params = {
        model: res[3]
        for res, model
        in zip(result, itertools.product(df.country.unique(), df.store.unique(), df['product'].unique()))
    }
    return models, final_models, best_params


In [None]:
models, final_models, best_params = train_HW_models(df)

In [None]:
best_params

In [None]:
def predict(df, models):
    test_df = df.copy()
    predictions = {}
    for k, v in models.items():
        predictions[k] = v.forecast(400)
    test_df['num_sold'] = test_df.apply(
        lambda x: predictions[(x.country, x.store, x['product'])].loc[x['date']],
        axis=1
    )
    return test_df

In [None]:
smape_error(predict(df.sort_values('date').tail(366), models=models)['num_sold'], df.sort_values('date').tail(366)['num_sold'])


In [None]:
preds = predict(test_df, final_models)

In [None]:
assert preds[preds.num_sold.isna()].empty

In [None]:
preds.to_csv('submission.csv',columns=['row_id','num_sold'], index=False)
# Eyeball check
!head submission.csv