In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import matplotlib as mpl
from cycler import cycler
import seaborn as sns

In [None]:
df_articles = pd.read_csv("/kaggle/input/h-and-m-personalized-fashion-recommendations/articles.csv")
df_tran = pd.read_csv("/kaggle/input/h-and-m-personalized-fashion-recommendations/transactions_train.csv")
df_cust = pd.read_csv("/kaggle/input/h-and-m-personalized-fashion-recommendations/customers.csv")

# Analysis of articles

In [None]:
for i in df_articles.columns:
    print(i,len(df_articles[i].unique()) )

## Number of articles of each index_name

In [None]:
print(f"Figure figsize: {plt.rcParams.get('figure.figsize')}")
print(f"Figure dpi: {plt.rcParams.get('figure.dpi')}")
print(f"Savefig dpi: {plt.rcParams.get('savefig.dpi')}\n")

print(f"Font size: {plt.rcParams.get('font.size')}")
print(f"Legend fontsize: {plt.rcParams.get('legend.fontsize')}")
print(f"Figure titlesize: {plt.rcParams.get('figure.titlesize')}\n")

print(f"Axes prop_cycle: {plt.rcParams.get('axes.prop_cycle')}\n")

In [None]:
n = 10
color = plt.cm.coolwarm(np.linspace(0, 2, n))

print(color)

In [None]:
mpl.rcParams['figure.figsize'] = [16.0, 10.0]
mpl.rcParams['figure.dpi'] = 80
mpl.rcParams['savefig.dpi'] = 100

mpl.rcParams['font.size'] = 14
mpl.rcParams['legend.fontsize'] = 'xx-large'
mpl.rcParams['figure.titlesize'] = 'x-large'

mpl.rcParams['axes.prop_cycle'] = cycler('color', color)

print(plt.rcParams.get('figure.figsize'))
print(plt.rcParams.get('figure.dpi'))
print(plt.rcParams.get('savefig.dpi'))

print(plt.rcParams.get('font.size'))
print(plt.rcParams.get('legend.fontsize'))
print(plt.rcParams.get('figure.titlesize'))

print(plt.rcParams.get('axes.prop_cycle'))

In [None]:
sns.set_theme(style="darkgrid")
ax = sns.countplot(x="index_group_name", data = df_articles)
a = plt.xticks(rotation=45)

## Number of articles of each product type

In [None]:
ax = sns.countplot(x="product_type_name", data = df_articles, palette="Set1")
a = plt.xticks(rotation=90, fontsize=10)

## Number of articles of each graphical appearance name

In [None]:
ax = sns.countplot(x="graphical_appearance_name", data = df_articles, palette="Set2")
a = plt.xticks(rotation=90)

## Number of articles of each colour

In [None]:
ax = sns.countplot(x="colour_group_name", data = df_articles, palette="Set3")
a = plt.xticks(rotation=90)

# Analysis of customer

In [None]:
for i in df_cust.columns:
    print(i,len(df_cust[i].unique()) )

In [None]:
df_cust.isnull().sum()

In [None]:
df_cust['FN'] = df_cust['FN'].fillna(0)
df_cust['Active'] = df_cust['Active'].fillna(0)
df_cust['age'] = df_cust['age'].fillna(0)
df_cust['club_member_status'] = df_cust['club_member_status'].fillna("No info")
df_cust['fashion_news_frequency'] = df_cust['fashion_news_frequency'].fillna("None")
df_cust['fashion_news_frequency'] = df_cust['fashion_news_frequency'].replace("NONE", "None")

## Analysis of club member status

In [None]:
ax = sns.countplot(x="club_member_status", data = df_cust, palette="Set3")
a = plt.xticks(rotation=45)

## Analysis of fashion news frequency

In [None]:
ax = sns.countplot(x="fashion_news_frequency", data = df_cust, palette="Set3")
a = plt.xticks(rotation=45)

## Analysis of age

In [None]:
ax = sns.countplot(x="age", data = df_cust, palette="Set3")
a = plt.xticks(rotation=90, fontsize=12)

# Analysis of transaction

## Number of unique values each column has:

In [None]:
for i in df_tran.columns:
    print(i,len(df_tran[i].unique()) )

## Analysis of sales each month

In [None]:
df_tran["Month_year"] = pd.to_datetime(df_tran.t_dat).dt.to_period('M')
ax = sns.countplot(x="Month_year", data = df_tran, palette="Set3")
a = plt.xticks(rotation=90)

## Analysis of sales channel id

In [None]:
ax = sns.countplot(x="sales_channel_id", data = df_tran, palette="Set3")
a = plt.xticks(rotation=45)

In [None]:
ax = sns.stripplot(x="Month_year", y = "price", data = df_tran, palette="Set3")
a = plt.xticks(rotation=90)

In [None]:
df_tran_arti = pd.merge(
    df_tran, 
    df_articles[["article_id", "index_group_name", "product_type_name", "graphical_appearance_name", "colour_group_name"]], 
    how = "left", 
    on = "article_id"
)

In [None]:
df_tran_arti.columns

# Checking sales of product types

In [None]:
ax = sns.countplot(x="product_type_name", data = df_tran_arti, palette="Set3")
a = plt.xticks(rotation=90,fontsize=10)

In [None]:
df_tran_arti.t_dat = pd.to_datetime(df_tran_arti.t_dat)

In [None]:
a = df_tran_arti.loc[((df_tran_arti["t_dat"] > "2018-09-15") & (df_tran_arti["t_dat"] < "2018-10-01")) |
                    ((df_tran_arti["t_dat"] > "2019-09-15") & (df_tran_arti["t_dat"] < "2019-10-01")) |
                    ((df_tran_arti["t_dat"] > "2020-09-15") & (df_tran_arti["t_dat"] < "2020-10-01"))]

In [None]:
ax = sns.countplot(x="product_type_name", data = a, palette="Set3")
a = plt.xticks(rotation=90, fontsize=10)