In [None]:
import numpy as np
import pandas as pd 
import json
import plotly.express as px
from statsmodels.tsa.seasonal import seasonal_decompose

# Map

In [None]:
df = pd.read_csv("../input/tabular-playground-series-jan-2022/train.csv")
dfAgg = df[["country", "num_sold"]].groupby(["country"]).sum().reset_index()

isoAlpha3=["FIN", "NOR", "SWE"]
for i in range(3):
    dfAgg.iloc[i,0] = isoAlpha3[i]

fig = px.choropleth(
    dfAgg, 
    locations="country", 
    color='num_sold',
    color_continuous_scale="Viridis",
    range_color=(3000000, 450000),
    scope="europe",
    labels={'num_sold':'Sold Items'}
)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

# Time Series by Country and Store

In [None]:
def rowToColProd(row, product):
    if row["product"] == product:
        result = row.num_sold
    else:
        result = 0
    return result

In [None]:
dfAggStore = df.drop(columns=["row_id"]).set_index("date")

for i in dfAggStore["product"].unique():
    dfAggStore[f"{i}"] = dfAggStore.apply(rowToColProd, axis="columns", product=i)

dfAggStore = dfAggStore.groupby(["date", "country", "store"]).sum().reset_index().set_index("date")

for l in range(3):
    viz=dfAggStore.loc[dfAggStore.country == df.country.unique()[l]].drop(columns=["country"])
    fig = px.line(viz, facet_col="store", facet_col_wrap=2, title=df.country.unique()[l])
    fig.update_layout(template="simple_white")
    fig.show()

In [None]:
def SaisonalComponents(DF,periods, by: str, noTrend=False):
    DF = DF.copy()
    DF.sort_values(by=["country", by, "date"], inplace=True)
    
    t="Season_"
    if noTrend!=False:
        t="Residual_"
    
    for i in DF.columns[2:]:
        DF["{1}{0}".format(i,t)] = 0
        for c in DF.country.unique():
            for s in DF[f"{by}"].unique():
                result = seasonal_decompose(DF.loc[(DF.country == c) & (DF[f"{by}"] == s),f"{i}"], model='additive', period=periods)
                season=result.seasonal
                if noTrend==True:
                    trend=result.trend
                else:
                    trend=0
                result = DF.loc[(DF.country == c) & (DF[f"{by}"] == s),"{0}".format(i)]  - season - trend
                DF.loc[(DF.country == c) & (DF[f"{by}"] == s),"{1}{0}".format(i,t)] = result
    return DF

# Seasonal Adjusted Time Series by Country and Store

In [None]:
noSeasonStore = SaisonalComponents(dfAggStore,365, "store")
noSeasonStore.drop(columns=["num_sold", "Kaggle Mug", "Kaggle Hat", "Kaggle Sticker"], inplace=True)

In [None]:
for l in range(3):
    viz=noSeasonStore.loc[noSeasonStore.country == df.country.unique()[l]].drop(columns=["country"])
    fig = px.line(viz, facet_col="store", facet_col_wrap=2, title=df.country.unique()[l] + " Season Filtered")
    fig.update_layout(template="simple_white")
    fig.show()

# Time Series by Country and Product

In [None]:
def rowToCol(row, store):
    if row["store"] == store:
        result = row.num_sold
    else:
        result = 0
    return result

In [None]:
dfAggProd = df.drop(columns=["row_id"]).set_index("date")

for i in dfAggProd["store"].unique():
    dfAggProd[f"{i}"] = dfAggProd.apply(rowToCol, axis="columns", store=i)

dfAggProd = dfAggProd.groupby(["date", "country", "product"]).sum().reset_index().set_index("date")

for l in range(3):
    viz=dfAggProd.loc[dfAggProd.country == df.country.unique()[l]].drop(columns=["country"])
    fig = px.line(viz, facet_col="product", facet_col_wrap=2, title=df.country.unique()[l])
    fig.update_layout(template="simple_white")
    fig.show()

In [None]:
noSeasonProd = SaisonalComponents(dfAggProd,365, "product")
noSeasonProd.drop(columns=["num_sold", "KaggleMart", "KaggleRama"], inplace=True)

# Seasonal Adjusted Time Series by Country and Product

In [None]:
for l in range(3):
    viz=noSeasonProd.loc[noSeasonProd.country == df.country.unique()[l]].drop(columns=["country"])
    fig = px.line(viz, facet_col="product", facet_col_wrap=2, title=df.country.unique()[l] + " Season Filtered")
    fig.update_layout(template="simple_white")
    fig.show()

# Correlations by Country and Product (No Season no Trend)

In [None]:
noSeasonStore = SaisonalComponents(dfAggStore,365, "store", noTrend=True)
noSeasonStore.drop(columns=["num_sold", "Kaggle Mug", "Kaggle Hat", "Kaggle Sticker"], inplace=True)

for l in range(3):
    viz=noSeasonStore.loc[noSeasonStore.country == df.country.unique()[l]].drop(columns=["country"]).corr()
    for i in range(viz.shape[0]):
        viz.iloc[i,i:viz.shape[0]] = None
        
    fig =  px.imshow(
        viz,
        color_continuous_scale='Tealrose',
        range_color=(0.8,1)
    )
    
    fig.update_layout(
        height=600, width=600,
        title="Correlations "+df.country.unique()[l]+" by Product",
        template="simple_white"
    )
    
    fig.show()

# Correlations by Country and Store (No Season no Trend)

In [None]:
noSeasonProd = SaisonalComponents(dfAggProd,365, "product", noTrend=True)
noSeasonProd.drop(columns=["num_sold", "KaggleMart", "KaggleRama"], inplace=True)

for l in range(3):
    viz=noSeasonProd.loc[noSeasonProd.country == df.country.unique()[l]].drop(columns=["country"]).corr()
    for i in range(viz.shape[0]):
        viz.iloc[i,i:viz.shape[0]] = None
        
    fig =  px.imshow(
        viz,
        color_continuous_scale='Tealrose',
        range_color=(0.8,1)
    )
    
    fig.update_layout(
        height=600, width=600,
        title="Correlations "+df.country.unique()[l]+" by Store",
        template="simple_white"
    )
    
    fig.show()

# Sales by Country, Store, and Product

In [None]:
df["sold items"] = round(df.num_sold / 100).astype("int")*100
        
fig = px.parallel_categories(
    df.sort_values(by="sold items"),
    dimensions=["country", "store", "product", "sold items"],   
    color="sold items", color_continuous_scale=px.colors.sequential.Inferno,
    labels={"sold items": "Sold Items", "country":"Country", "store":"Store", "product":"Product"},
    title="Colored by Sold Items (rounded to the nearest hundred)"
)
fig.update_layout(height=1200, width=800)
fig.show()