# EDA
* Time series graph
* Box plot
* pie chart

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import seaborn as sns

In [None]:
# load dataframe
train_df = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv')

In [None]:
train_df

In [None]:
train_df.info()

In [None]:
# element extraction
features = ['country', 'store', 'product']
key_dict = ['countries', 'stores', 'products']
value_dict = []
for feat in features:
    item = train_df[feat].unique()
    value_dict.append(item)
feat_dict = dict(zip(key_dict,value_dict))
print(feat_dict)

## Time series graph

In [None]:
def plot_graph(country, store, product):
    train_df = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv')
    # convert date type from 'str' to 'datetime'
    train_df['date'] = pd.to_datetime(train_df.date)
    df = train_df[(train_df['country']== country) & (train_df['store'] == store) & (train_df['product'] == product)]
    fig = plt.figure(figsize=(50,10))
    plt.bar(df['date'],df['num_sold'])
    plt.axvline(datetime.date(2015, 1, 1), 0, 6500, color="red", linestyle="--")
    plt.axvline(datetime.date(2016, 1, 1), 0, 6500, color="red", linestyle="--")
    plt.axvline(datetime.date(2017, 1, 1), 0, 6500, color="red", linestyle="--")
    plt.axvline(datetime.date(2018, 1, 1), 0, 6500, color="red", linestyle="--")
    mean = df['num_sold'].mean()
    median = df['num_sold'].median()
    df_max = df['num_sold'].max()
    df_min = df['num_sold'].min()
    
    
    fig.suptitle(f'The sales of {product} in {store}, {country} \n MEAN:{mean:.1f}, MEDIAN:{median}, MAX:{df_max}, MIN:{df_min}', fontsize = 30) 
    plt.show()

In [None]:
# Display
for c in feat_dict['countries']:
    for s in feat_dict['stores']:
        for p in feat_dict['products']:
            plot_graph(c,s,p)

## Box plot

In [None]:
def box_plot(country, store, product):
    train_df = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv')
    # convert date type from 'str' to 'datetime'
    train_df['date'] = pd.to_datetime(train_df.date)
    df = train_df[(train_df['country']== country) & (train_df['store'] == store) & (train_df['product'] == product)]
    fig = plt.figure(figsize=(40,10))
    # The median and other values in the figure are calculated including outliers.
    plt.boxplot(df["num_sold"],
                vert=False,
                patch_artist=True,  
                widths=0.5,  
                boxprops=dict(facecolor='#1E90FF80', 
                             color='black', linewidth=1),
                medianprops=dict(color='black', linewidth=1), 
                whiskerprops=dict(color='black', linewidth=1), 
                capprops=dict(color='black', linewidth=1),
                flierprops=dict(markeredgecolor='black', markeredgewidth=1))
    
    mean = df['num_sold'].mean()
    median = df['num_sold'].median()
    df_max = df['num_sold'].max()
    df_min = df['num_sold'].min()
    df_25 = df['num_sold'].quantile(0.25)
    df_75 = df['num_sold'].quantile(0.75)
    
    fig.suptitle(f'The sales of {product} in {store}, {country} \n MEAN:{mean:.1f}, MIN:{df_min}, 25%: {df_25}, MEDIAN:{median}, 75%: {df_75}, MAX:{df_max}', fontsize = 30) 
    plt.show()

In [None]:
# Display
for c in feat_dict['countries']:
    for s in feat_dict['stores']:
        for p in feat_dict['products']:
            box_plot(c,s,p)

## Pie chart

In [None]:
def pie_chart(columns:str):
    df = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv')
    labels = df[columns].unique()
    data = [df[df[columns] == i]['num_sold'].sum() for i in labels]
    fig = plt.figure(figsize=(9,9))
    plt.pie(data, labels=labels, autopct='%.2f')
    plt.title(f'Total sum of product sold by {columns} ', fontsize=16)
    for i in range(len(labels)):
        print(f'{labels[i]}: {df[df[columns] == labels[i]].num_sold.sum()}')
    plt.axis('equal')
    plt.show()

In [None]:
# Display
for feat in features:
    pie_chart(feat)