# This Notebook tries to Predict number of Stickers Sold.

### Loading Dataset.

Let's start by loading the data for exploration.

In [None]:
# Necessary libraries.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
path = "../../../Data/playground-series-s5e1/"
train = pd.read_csv(path+"train.csv")
test = pd.read_csv(path+"test.csv")

A quick peek at the data.

In [None]:
train.head()

In [None]:
train.tail()

In [None]:
test.head()

In [None]:
test.tail()

In [None]:
# Let's make the id column the index of the dataframes. 
train = train.set_index("id")
test = test.set_index("id")

### Exploratory Data Analysis.

In [None]:
train.info()

In [None]:
test.info()

In [None]:
# Number of stickers sale missing.
null_num_sold = train.loc[train["num_sold"].isnull() == True, ["num_sold"]]
null_num_sold_count = len(null_num_sold)
percent_missing = np.round((null_num_sold_count/len(train))*100, 2)
print(f"{null_num_sold_count} stickers have missing values out of {len(train)} total sticketrs representing {percent_missing}%")

In [None]:
# Countries in train.
train["country"].value_counts()

In [None]:
# Stores selling stickers.
train["store"].value_counts()

In [None]:
 # Products sold.
train["product"].value_counts()

### Visualization.

We will make a barplot, histogram and line for various aspects of the dataset.

In [None]:
# Barplot for the different stores in each country.
stores_country = train.groupby(["country", "store"])[["num_sold"]].sum()
stores_country = stores_country.reset_index()
wide_stores_country = stores_country.pivot(index= "country", columns="store", values="num_sold")

In [None]:
fig, ax = plt.subplots(layout='constrained')

x = np.arange(len(wide_stores_country))
width = 0.25
multiplier = 0

for attribute in wide_stores_country.columns:
    offset = multiplier * width
    rects = ax.bar(x+offset, wide_stores_country[attribute], width=width, label=attribute)
    ax.bar_label(rects, rotation=45)
    multiplier += 1

ax.title("Sticker sale")
ax.set_xticks(x + width, wide_stores_country.index)
ax.legend(loc="upper right", ncols=3)
ax.set_ylim(0, 15500) 
plt.show()