# <p style="padding: 20px; font-family: 'JetBrains Mono'; font-weight: bold; font-size: 100%; color: #2F486B; letter-spacing: 2px; text-align: center; border-radius: 8px; border: 3px solid #2F486B; background-color: #FFFCFA">Playground Series S3E19</p>

In [1]:
# %load ../general_settings.py
import os
import shutil
import subprocess
import warnings
from collections import defaultdict, namedtuple
from copy import copy
from functools import partial
from itertools import chain, combinations, product
from pathlib import Path
from time import strftime

ON_KAGGLE = os.getenv("KAGGLE_KERNEL_RUN_TYPE") is not None
if ON_KAGGLE:
    warnings.filterwarnings("ignore")

import joblib
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import scipy.stats as stats
import seaborn as sns
import shap
from colorama import Fore, Style
from IPython.core.display import HTML, display_html
from plotly.subplots import make_subplots
from scipy.cluster.hierarchy import linkage
from scipy.spatial.distance import squareform

# Colorama settings.
CLR = (Style.BRIGHT + Fore.BLACK) if ON_KAGGLE else (Style.BRIGHT + Fore.WHITE)
RED = Style.BRIGHT + Fore.RED
BLUE = Style.BRIGHT + Fore.BLUE
CYAN = Style.BRIGHT + Fore.CYAN
RESET = Style.RESET_ALL

FONT_COLOR = "#2F486B"
BACKGROUND_COLOR = "#FFFCFA"

CELL_HOVER = {  # for row hover use <tr> instead of <td>
    "selector": "td:hover",
    "props": "background-color: #FFFCFA",
}
TEXT_HIGHLIGHT = {
    "selector": "td",
    "props": "color: #2F486B; font-weight: bold",
}
INDEX_NAMES = {
    "selector": ".index_name",
    "props": "font-style: italic; background-color: #C94040; color: #FFFCFA;",
}
HEADERS = {
    "selector": "th:not(.index_name)",
    "props": "font-style: italic; background-color: #C94040; color: #FFFCFA;",
}
DF_STYLE = (INDEX_NAMES, HEADERS, TEXT_HIGHLIGHT)
DF_CMAP = sns.light_palette("#D4D0A9", as_cmap=True)

# Utility functions.
def download_dataset_from_kaggle(user, dataset, directory):
    command = "kaggle datasets download -d "
    filepath = directory / (dataset + ".zip")
    if not filepath.is_file():
        subprocess.run((command + user + "/" + dataset).split())
        filepath.parent.mkdir(parents=True, exist_ok=True)
        shutil.unpack_archive(dataset + ".zip", "data")
        shutil.move(dataset + ".zip", "data")


def download_competition_from_kaggle(competition):
    command = "kaggle competitions download -c "
    filepath = Path("data/" + competition + ".zip")
    if not filepath.is_file():
        subprocess.run((command + competition).split())
        Path("data").mkdir(parents=True, exist_ok=True)
        shutil.unpack_archive(competition + ".zip", "data")
        shutil.move(competition + ".zip", "data")


# Html `code` block highlight. Must be included at the end of all imports!
HTML(
    """
<style>
code {
    background: rgba(123, 171, 237, 0.25) !important;
    border-radius: 4px !important;
}
</style>
"""
)


In [2]:
def smape(y_true, y_pred, **kwargs):
    """Competition evaluation metric - Symmetric Mean Absolute Percentage Error.
    https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error"""

    eps = kwargs.get("eps", 1e-6)
    s = np.sum(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred) + eps))
    return 100 / len(y_true) * s


A = np.array([100])
F = np.array([110])
print(smape(A, F))


9.52380947845805


<blockquote style="
    margin-right: auto; 
    margin-left: auto; 
    background-color: #FFFCFA;
    padding: 20px;
    border-radius: 8px;
    border: 3px solid #2F486B;
">
<p style="
    font-size: 20px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    border-bottom: 2px solid #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    <b>Competition Description</b> 📜
</p>

<p style="
    font-size: 16px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    For this challenge, you will be predicting a full year worth of sales for various fictitious learning modules from different fictitious Kaggle-branded stores in different (real!) countries. This dataset is completely synthetic, but contains many effects you see in real-world data, e.g., weekend and holiday effect, seasonality, etc. You are given the task of predicting sales during for year 2022.
</p>
    
<p style="
    font-size: 20px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    border-bottom: 2px solid #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    <b>Task</b> 💡
</p>

<p style="
    font-size: 16px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    <b>This is a time-series forecasting competition. Your task is to forecast a full year (2022) worth of sales - <code>Num_Sold</code>.<br>
    The competition evaluation metric is <a href="https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error" style="color: #C94040;"><b>SMAPE</b></a> (symmetric mean absolute percentage error).</b>

</p>
    
<p style="
    font-size: 20px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    border-bottom: 2px solid #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    <b>This Notebook Covers</b> 📔
</p>

<ul style="
    font-size: 16px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    <li>We will see...</li>
    <li></li>
</ul>


<p style="
    font-size: 20px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    border-bottom: 2px solid #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    <b>See More Here</b> 📈
</p>

<p style="
    font-size: 16px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    <a href="https://www.kaggle.com/competitions/playground-series-s3e19/overview" style="color: #C94040;"><b>Playground Series - Season 3, Episode 19</b></a>
</p>

</blockquote>

# <p style="padding: 20px; font-family: 'JetBrains Mono'; font-weight: bold; font-size: 100%; color: #2F486B; letter-spacing: 2px; text-align: center; border-radius: 8px; border: 3px solid #2F486B; background-color: #FFFCFA">Look at Dataset &amp; Features</p>

<blockquote style="
    margin-right: auto; 
    margin-left: auto; 
    background-color: #FFFCFA;
    padding: 20px;
    border-radius: 8px;
    border: 3px solid #2F486B;
"> 
<p style="
    font-size: 20px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    border-bottom: 2px solid #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    <b>Let's get started</b> 📊
</p>

<p style="
    font-size: 16px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    First, let's load this dataset and see what we have there.
</p>

</blockquote>

In [3]:
competition = "playground-series-s3e19"

if not ON_KAGGLE:
    download_competition_from_kaggle(competition)
    train_path = "data/train.csv"
    test_path = "data/test.csv"
else:
    train_path = f"/kaggle/input/{competition}/train.csv"
    test_path = f"/kaggle/input/{competition}/test.csv"

train = pd.read_csv(train_path, index_col="id", parse_dates=["date"]).rename(columns=str.title)
test = pd.read_csv(test_path, index_col="id", parse_dates=["date"]).rename(columns=str.title)


In [4]:
train.head(10).style.set_table_styles(DF_STYLE)


Unnamed: 0_level_0,Date,Country,Store,Product,Num_Sold
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2017-01-01 00:00:00,Argentina,Kaggle Learn,Using LLMs to Improve Your Coding,63
1,2017-01-01 00:00:00,Argentina,Kaggle Learn,Using LLMs to Train More LLMs,66
2,2017-01-01 00:00:00,Argentina,Kaggle Learn,Using LLMs to Win Friends and Influence People,9
3,2017-01-01 00:00:00,Argentina,Kaggle Learn,Using LLMs to Win More Kaggle Competitions,59
4,2017-01-01 00:00:00,Argentina,Kaggle Learn,Using LLMs to Write Better,49
5,2017-01-01 00:00:00,Argentina,Kaggle Store,Using LLMs to Improve Your Coding,88
6,2017-01-01 00:00:00,Argentina,Kaggle Store,Using LLMs to Train More LLMs,98
7,2017-01-01 00:00:00,Argentina,Kaggle Store,Using LLMs to Win Friends and Influence People,14
8,2017-01-01 00:00:00,Argentina,Kaggle Store,Using LLMs to Win More Kaggle Competitions,83
9,2017-01-01 00:00:00,Argentina,Kaggle Store,Using LLMs to Write Better,69


In [5]:
train.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 136950 entries, 0 to 136949
Data columns (total 5 columns):
 #   Column    Non-Null Count   Dtype         
---  ------    --------------   -----         
 0   Date      136950 non-null  datetime64[ns]
 1   Country   136950 non-null  object        
 2   Store     136950 non-null  object        
 3   Product   136950 non-null  object        
 4   Num_Sold  136950 non-null  int64         
dtypes: datetime64[ns](1), int64(1), object(3)
memory usage: 6.3+ MB


In [6]:
test.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 27375 entries, 136950 to 164324
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Date     27375 non-null  datetime64[ns]
 1   Country  27375 non-null  object        
 2   Store    27375 non-null  object        
 3   Product  27375 non-null  object        
dtypes: datetime64[ns](1), object(3)
memory usage: 1.0+ MB


<blockquote style="
    margin-right: auto; 
    margin-left: auto; 
    background-color: #FFFCFA;
    padding: 20px;
    border-radius: 8px;
    border: 3px solid #2F486B;
"> 

<p style="
    font-size: 20px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    border-bottom: 2px solid #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    <b>Features Description</b> 📔
</p>

<p style="
    font-size: 16px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    There is no great philosophy in the features. These are:
</p>

<ul style="
    font-size: 16px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    <li><code>Date</code> - Date of recording of observation.</li>
    <li><code>Country</code> - Real country the observation was recorded.</li>
    <li><code>Store</code> - Fictional store the observation was recorded.</li>
    <li><code>Product</code> - Fictional product related to the observation.</li>
    <li><code>Num_Sold</code> - A number of sold products.</li>
</ul>

<p style="
    font-size: 16px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    As you can see, we have there four features, including three categorical and one time-like. The target we need to forecast is a discrete number. Moreover, there are $136950$ observations in the training dataset and $27375$ in test one, which makes this dataset rather medium-sized (perhaps large).
</p>

</blockquote>

In [7]:
print(CLR + "Training Dataset Missing Values:", RED + f"{train.isna().any().any()}")
print(CLR + "Test Dataset Missing Values:    ", RED + f"{test.isna().any().any()}")


[1m[37mTraining Dataset Missing Values: [1m[31mFalse
[1m[37mTest Dataset Missing Values:     [1m[31mFalse


In [8]:
targets = ["Num_Sold"]

print(
    CLR + "Training Dataset Duplicated Rows:",
    RED + f"{train.drop(targets, axis=1).duplicated().sum()}",
)

print(CLR + "Test Dataset Duplicated Rows:    ", RED + f"{test.duplicated().sum()}")


[1m[37mTraining Dataset Duplicated Rows: [1m[31m0
[1m[37mTest Dataset Duplicated Rows:     [1m[31m0


<blockquote style="
    margin-right: auto; 
    margin-left: auto; 
    background-color: #FFFCFA;
    padding: 20px;
    border-radius: 8px;
    border: 3px solid #2F486B;
"> 

<p style="
    font-size: 20px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    border-bottom: 2px solid #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    <b>Observations</b> 🕵
</p>

<p style="
    font-size: 16px;
    font-family: 'JetBrains Mono';
    color: #2F486B;
    margin-left: 15px;
    margin-right: 15px;
">
    There are no duplicated rows and missing values in both, training and test datasets. Therefore, we don't need to care about some kind of imputation or removing samples.
</p>

</blockquote>

# <p style="padding: 20px; font-family: 'JetBrains Mono'; font-weight: bold; font-size: 100%; color: #2F486B; letter-spacing: 2px; text-align: center; border-radius: 8px; border: 3px solid #2F486B; background-color: #FFFCFA">Distributions &amp; Pivot Tables</p>

In [9]:
train_by_country_product = (
    train.groupby(["Country", "Product"])
    .agg(
        Mean_Num_Sold=("Num_Sold", "mean"),
    )
    .reset_index()
)

train_by_country_store = (
    train.groupby(["Country", "Store"])
    .agg(
        Mean_Num_Sold=("Num_Sold", "mean"),
    )
    .reset_index()
)


In [10]:
display_html(
    train_by_country_product.style.set_table_styles(DF_STYLE)
    .background_gradient(DF_CMAP)
    .set_table_attributes("style='display:inline'")
    ._repr_html_()
    + train_by_country_store.style.set_table_styles(DF_STYLE)
    .background_gradient(DF_CMAP)
    .set_table_attributes("style='display:inline'")
    ._repr_html_(),
    raw=True,
)


Unnamed: 0,Country,Product,Mean_Num_Sold
0,Argentina,Using LLMs to Improve Your Coding,82.321833
1,Argentina,Using LLMs to Train More LLMs,81.183279
2,Argentina,Using LLMs to Win Friends and Influence People,12.414567
3,Argentina,Using LLMs to Win More Kaggle Competitions,68.792442
4,Argentina,Using LLMs to Write Better,62.561336
5,Canada,Using LLMs to Improve Your Coding,342.997444
6,Canada,Using LLMs to Train More LLMs,338.050018
7,Canada,Using LLMs to Win Friends and Influence People,52.833333
8,Canada,Using LLMs to Win More Kaggle Competitions,286.286601
9,Canada,Using LLMs to Write Better,260.06517

Unnamed: 0,Country,Store,Mean_Num_Sold
0,Argentina,Kagglazon,127.66988
1,Argentina,Kaggle Learn,22.236364
2,Argentina,Kaggle Store,34.457831
3,Canada,Kagglazon,530.374808
4,Canada,Kaggle Learn,93.551698
5,Canada,Kaggle Store,144.213034
6,Estonia,Kagglazon,270.789595
7,Estonia,Kaggle Learn,47.578204
8,Estonia,Kaggle Store,73.452464
9,Japan,Kagglazon,455.056955


In [11]:
fig = px.bar(
    train_by_country_product.sort_values("Mean_Num_Sold"),
    x="Product",
    y="Mean_Num_Sold",
    color="Country",
    color_discrete_sequence=["#B9C0CA", "#74849A", "#2F486B", "#62455C", "#C94040"],
    barmode="group",
    opacity=0.8,
)
fig.update_layout(
    font_color=FONT_COLOR,
    title="Mean Number of Products Sold",
    title_font_size=18,
    plot_bgcolor=BACKGROUND_COLOR,
    paper_bgcolor=BACKGROUND_COLOR,
    bargap=0.2,
    bargroupgap=0.1,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    width=840,
    height=540,
)
fig.show()


In [12]:
fig = px.bar(
    train_by_country_store.sort_values("Mean_Num_Sold"),
    x="Store",
    y="Mean_Num_Sold",
    color="Country",
    color_discrete_sequence=["#B9C0CA", "#74849A", "#2F486B", "#62455C", "#C94040"],
    barmode="group",
    opacity=0.8,
)
fig.update_layout(
    font_color=FONT_COLOR,
    title="Mean Number of Products Sold",
    title_font_size=18,
    plot_bgcolor=BACKGROUND_COLOR,
    paper_bgcolor=BACKGROUND_COLOR,
    bargap=0.2,
    bargroupgap=0.1,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    width=840,
    height=540,
)
fig.show()

In [13]:
def get_kde_estimation(data_series, sample=False, samples=1000):
    if sample:
        kde = stats.gaussian_kde(data_series.dropna().sample(samples, random_state=42))
    else:
        kde = stats.gaussian_kde(data_series.dropna())
    kde_range = np.linspace(
        data_series.min() - data_series.max() * 0.1,
        data_series.max() + data_series.max() * 0.1,
        samples,
    )
    estimated_values = kde.evaluate(kde_range)
    estimated_values_cum = np.cumsum(estimated_values)
    estimated_values_cum /= estimated_values_cum.max()
    return kde_range, estimated_values, estimated_values_cum


def get_n_rows_axes(n_features, n_cols=5, n_rows=None):
    n_rows = int(np.ceil(n_features / n_cols))
    current_col = range(1, n_cols + 1)
    current_row = range(1, n_rows + 1)
    return n_rows, list(product(current_row, current_col))


In [14]:
countries = train.Country.unique().tolist()
products = train.Product.unique().tolist()
category_combs = np.array(np.meshgrid(countries, products)).T.reshape(-1, 2)
color_combs = ["#B9C0CA", "#74849A", "#2F486B", "#62455C", "#C94040"] * len(countries)

n_cols = len(products)
n_rows, axes = get_n_rows_axes(n_cols * len(countries), n_cols=n_cols)

fig = make_subplots(
    rows=n_rows,
    cols=n_cols,
    y_title="Probability Density",
    x_title="Num_Sold",
    row_titles=countries,
    horizontal_spacing=0.06,
    vertical_spacing=0.05,
)

show_legend = True

for k, ((country, prod), color) in enumerate(zip(category_combs, color_combs)):
    if 1 + k > n_cols:
        show_legend = False
    current_row, current_col = 1 + k // n_cols, 1 + k % n_cols
    kde_range, estimated_values, _ = get_kde_estimation(
        train.query(f"Country == '{country}' & Product == '{prod}'")["Num_Sold"], sample=True
    )
    fig.add_scatter(
        x=kde_range,
        y=estimated_values,
        line=dict(dash="solid", color=color, width=1),
        fill="tozeroy",
        name=prod,
        legendgroup=prod,
        showlegend=show_legend,
        row=current_row,
        col=current_col,
    )
    fig.update_yaxes(tickfont_size=7, showgrid=False, row=current_row, col=current_col)
    fig.update_xaxes(tickfont_size=7, showgrid=False, row=current_row, col=current_col)

fig.update_annotations(font_size=14)
fig.update_layout(
    font_color=FONT_COLOR,
    title="Kernel Density Estimation<br>By Country & Product",
    title_font_size=18,
    plot_bgcolor=BACKGROUND_COLOR,
    paper_bgcolor=BACKGROUND_COLOR,
    width=840,
    height=840,
    legend=dict(
        orientation="v",
        yanchor="bottom",
        xanchor="right",
        y=1.05,
        x=1,
    ),
)
fig.show()


In [15]:
stores = train.Store.unique().tolist()
category_combs = np.array(np.meshgrid(countries, stores)).T.reshape(-1, 2)
color_combs = ["#B9C0CA", "#2F486B", "#C94040"] * len(countries)

n_cols = len(stores)
n_rows, axes = get_n_rows_axes(n_cols * len(countries), n_cols=n_cols)

fig = make_subplots(
    rows=n_rows,
    cols=n_cols,
    y_title="Probability Density",
    x_title="Num_Sold",
    row_titles=countries,
    horizontal_spacing=0.06,
    vertical_spacing=0.05,
)

show_legend = True

for k, ((country, store), color) in enumerate(zip(category_combs, color_combs)):
    if 1 + k > n_cols:
        show_legend = False
    current_row, current_col = 1 + k // n_cols, 1 + k % n_cols
    kde_range, estimated_values, _ = get_kde_estimation(
        train.query(f"Country == '{country}' & Store == '{store}'")["Num_Sold"], sample=True
    )
    fig.add_scatter(
        x=kde_range,
        y=estimated_values,
        line=dict(dash="solid", color=color, width=1),
        fill="tozeroy",
        name=store,
        legendgroup=store,
        showlegend=show_legend,
        row=current_row,
        col=current_col,
    )
    fig.update_yaxes(tickfont_size=7, showgrid=False, row=current_row, col=current_col)
    fig.update_xaxes(tickfont_size=7, showgrid=False, row=current_row, col=current_col)

fig.update_annotations(font_size=14)
fig.update_layout(
    font_color=FONT_COLOR,
    title="Kernel Density Estimation<br>By Country & Store",
    title_font_size=18,
    plot_bgcolor=BACKGROUND_COLOR,
    paper_bgcolor=BACKGROUND_COLOR,
    width=840,
    height=840,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        xanchor="right",
        y=1.05,
        x=1,
    ),
)
fig.show()


In [61]:
category_combs = np.array(np.meshgrid(products, stores)).T.reshape(-1, 2)
color_combs = ["#B9C0CA", "#2F486B", "#C94040"] * len(countries)

n_cols = len(stores)
n_rows, axes = get_n_rows_axes(n_cols * len(countries), n_cols=n_cols)

subtitles = np.empty(n_rows * n_cols, dtype=object)
subtitles[1::3] = products

fig = make_subplots(
    rows=n_rows,
    cols=n_cols,
    y_title="Probability Density",
    x_title="Num_Sold",
    subplot_titles=subtitles,
    horizontal_spacing=0.06,
    vertical_spacing=0.08,
)

show_legend = True
title_text = ""

for k, ((prod, store), color) in enumerate(zip(category_combs, color_combs)):
    if 1 + k > n_cols:
        show_legend = False
    current_row, current_col = 1 + k // n_cols, 1 + k % n_cols
    kde_range, estimated_values, _ = get_kde_estimation(
        train.query(f"Product == '{prod}' & Store == '{store}'")["Num_Sold"], sample=True
    )
    fig.add_scatter(
        x=kde_range,
        y=estimated_values,
        line=dict(dash="solid", color=color, width=1),
        fill="tozeroy",
        name=store,
        legendgroup=store,
        showlegend=show_legend,
        row=current_row,
        col=current_col,
    )
    fig.update_yaxes(tickfont_size=7, showgrid=False, row=current_row, col=current_col)
    fig.update_xaxes(tickfont_size=7, showgrid=False, row=current_row, col=current_col)

fig.update_annotations(font_size=14)
fig.update_layout(
    font_color=FONT_COLOR,
    title="Kernel Density Estimation<br>By Product & Store",
    title_font_size=18,
    plot_bgcolor=BACKGROUND_COLOR,
    paper_bgcolor=BACKGROUND_COLOR,
    width=840,
    height=940,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        xanchor="right",
        y=1.05,
        x=1,
    ),
)
fig.show()


# <p style="padding: 20px; font-family: 'JetBrains Mono'; font-weight: bold; font-size: 100%; color: #2F486B; letter-spacing: 2px; text-align: center; border-radius: 8px; border: 3px solid #2F486B; background-color: #FFFCFA">Quick Analysis of Time Series</p>

In [18]:
category_combs = np.array(np.meshgrid(countries, products)).T.reshape(-1, 2)
color_combs = ["#B9C0CA", "#2F486B", "#C94040"] * len(countries)

n_cols = len(stores)
n_rows, axes = get_n_rows_axes(n_cols * len(countries), n_cols=n_cols)

fig = make_subplots(
    rows=n_rows,
    cols=n_cols,
    y_title="Probability Density",
    x_title="Num_Sold",
    row_titles=countries,
    horizontal_spacing=0.06,
    vertical_spacing=0.05,
)

show_legend = True

In [19]:
category_combs

array([['Argentina', 'Using LLMs to Improve Your Coding'],
       ['Argentina', 'Using LLMs to Train More LLMs'],
       ['Argentina', 'Using LLMs to Win Friends and Influence People'],
       ['Argentina', 'Using LLMs to Win More Kaggle Competitions'],
       ['Argentina', 'Using LLMs to Write Better'],
       ['Canada', 'Using LLMs to Improve Your Coding'],
       ['Canada', 'Using LLMs to Train More LLMs'],
       ['Canada', 'Using LLMs to Win Friends and Influence People'],
       ['Canada', 'Using LLMs to Win More Kaggle Competitions'],
       ['Canada', 'Using LLMs to Write Better'],
       ['Estonia', 'Using LLMs to Improve Your Coding'],
       ['Estonia', 'Using LLMs to Train More LLMs'],
       ['Estonia', 'Using LLMs to Win Friends and Influence People'],
       ['Estonia', 'Using LLMs to Win More Kaggle Competitions'],
       ['Estonia', 'Using LLMs to Write Better'],
       ['Japan', 'Using LLMs to Improve Your Coding'],
       ['Japan', 'Using LLMs to Train More LLMs'],
 

In [20]:


fig = make_subplots(
    rows=4,
    cols=1,
    shared_xaxes=True,
    vertical_spacing=0.1,
    x_title="Date",
    y_title="Temperature (\u2103)",
    subplot_titles=["Observed Values", "Trend", "Seasonality", "Residuals"],
)

observed = go.Scatter(
    x=decomposition.observed.index,
    y=decomposition.observed,
    name="Observed Temperature",
    line=dict(width=0.7, color="#2A357D"),
)
trend = go.Scatter(
    x=decomposition.trend.index,
    y=decomposition.trend,
    name="Trend",
    line=dict(color="#2A357D"),
)
seasonal = go.Scatter(
    x=decomposition.seasonal.index,
    y=decomposition.seasonal,
    name="Seasonality",
    line=dict(color="#2A357D"),
)
residuals = go.Scatter(
    x=decomposition.resid.index,
    y=decomposition.resid,
    name="Residuals",
    mode="markers",
    marker_size=1,
    line=dict(color="#2A357D"),
)

NameError: name 'decomposition' is not defined