In [None]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import os
from os import listdir
import cv2
import seaborn as sns

In [None]:
import matplotlib.ticker as mtick

def plot_bar(database, col, figsize=(13,5), pct=False, label='articles'):
    fig, ax = plt.subplots(figsize=figsize, facecolor='#f6f6f6')
    for loc in ['bottom', 'left']:
        ax.spines[loc].set_visible(True)
        ax.spines[loc].set_linewidth(2)
        ax.spines[loc].set_color('black')
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    if pct:
        data = database[col].value_counts()
        data = data.div(data.sum()).mul(100)
        data = data.reset_index()
        ax = sns.barplot(data=data, x=col, y='index', color='#2693d7', lw=1.5, ec='black', zorder=2)
        ax.set_xlabel('% of ' + label, fontsize=10, weight='bold')
        ax.xaxis.set_major_formatter(mtick.PercentFormatter())
    else:
        data = database[col].value_counts().reset_index()
        ax = sns.barplot(data=data, x=col, y='index', color='#2693d7', lw=1.5, ec='black', zorder=2)        
        ax.set_xlabel('# of articles' + label)
        
    ax.grid(zorder=0)
    #ax.text(0, -1, "column:", color='black', fontsize=12, ha='left', va='bottom', weight='bold')
    ax.text(0, -0.75, col, color='black', fontsize=10, ha='left', va='bottom', weight='bold', style='italic')
    ax.set_ylabel('')
        
    plt.show()

In [None]:
def show_items_in_category(column, value, no_imgs=4, title=None):
    data = article[article[column]==value]
    cat_ids = data['article_id'].iloc[:no_imgs].to_list()
    
    fig, ax = plt.subplots(1, no_imgs, figsize=(12,4))

    for i, prod_id in enumerate(cat_ids):
        folder = str(prod_id)[:2]
        file_path = '../input/h-and-m-personalized-fashion-recommendations/images/0{}/0{}.jpg'.format(folder, prod_id)

        img = plt.imread(file_path)       
        ax[i].imshow(img, aspect='equal')
        ax[i].grid(False)
        ax[i].set_xticks([], [])
        ax[i].set_yticks([], [])
        ax[i].set_xlabel(article[article['article_id']==int(prod_id)]['prod_name'].iloc[0])
    
    fig.suptitle(title)
    plt.show()

In [None]:
sub = pd.read_csv("../input/h-and-m-personalized-fashion-recommendations/sample_submission.csv")

In [None]:
sub.head()

The sub variable above is part of the "submission_sample.csv" file. consists of 2 columns, namely customer_id and the prediction results of product purchases in the next 7 days as many as 12 items. The amount of data is 1,371,980 data samples for each customer.

This file is the reference for the prediction results in this competition, which is evaluated using the calculation Mean Average Precision @ 12 (MAP@12)

In [None]:
sub.info()

In [None]:
article = pd.read_csv("../input/h-and-m-personalized-fashion-recommendations/articles.csv")

In [None]:
article.info()

the above article variable is part of the "articles.csv" file.

5 product related columns:
* •	product_code (int64) - 6-digit product code, 47 224 unique values
* •	prod_name (object) - name of a product, 132 unique values
* •	product_type_no (int64) - product type number, 131 unique values
* •	product_type_name (object) - name of a product type, equivalent of product_type_no
* •	product_group_name (object) - name of a product group, in total 19 groups

2 columns related to the pattern:
* •	graphical_appearance_no - code of a pattern, 30 unique values
* •	graphical_appearance_name - name of a pattern, 30 unique values

2 columns related to the color:
* •	colour_group_code - code of a color, 50 unique values
* •	colour_group_name - name of a color, 50 unique values

4 columns related to perceived colour (general tone):
* •	perceived_colour_value_id - perceived color id, 8 unique values
* •	perceived_colour_value_name - perceived color name, 8 unique values
* •	perceived_colour_master_id - perceived master color id, 20 unique values
* •	perceived_colour_master_name - perceived master color name, 20 unique values

2 columns related to the department:
* •	department_no - department number, 299 unique values
* •	department_name - department name, 299 unique values

4 columns related to the index, which is actually a top-level category:
* •	index_code - index code, 10 unique values
* •	index_name - index name, 10 unique values
* •	index_group_no - index group code, 5 unique values
* •	index_group_name - index group code, 5 unique values

2 columns related to the section:	
* •	section_no - section number, 56 unique values
* •	section_name - section name, 56 unique values

2 columns related to the garment group:
* •	garment_group_n - section number, 56 unique values
* •	garment_group_name - section name, 56 unique values

1 column with a detailed description of the article:
* •	detail_desc - 43 404 unique values

In [None]:
article.head()

In [None]:
plot_bar(article, 'index_group_name', pct=True)

In [None]:
plot_bar(article, 'perceived_colour_value_name', pct=True)

In [None]:
show_items_in_category('product_group_name', 'Garment Upper body', 5, 'Articles from a "Garment Upper body" category')

In [None]:
show_items_in_category('index_group_name', 'Sport', 5, 'Articles from a "Sport" category')

In [None]:
cust = pd.read_csv("../input/h-and-m-personalized-fashion-recommendations/customers.csv")

the above cust variable is part of the "customers.csv" file.

5 product related columns:
* •	FN - binary feature (1 or NaN)
* •	Active - binary feature (1 or NaN)
* •	club_member_status - status in a club, 3 unique values
* •	fashion_news_frequency - frequency of sending communication to the customer, 4 unique values
* •	age - age of the customer
* •	postal_code - postal code (anonimized), 352 899 unique values


In [None]:
cust.head()

In [None]:
cust.isna().sum()/len(article)*100

In [None]:
cust_backup = cust.copy()
cust['Active'] = cust['Active'].fillna(0)

fig, ax = plt.subplots(figsize=(5,5))
explode = (0, 0.1)
colors = sns.color_palette('Paired')
ax.pie(cust['Active'].value_counts(), explode=explode, labels=['Not-active','Active'],
      autopct='%1.1f%%',shadow=True, startangle=90, colors=colors)
ax.axis('equal')
plt.show()

In [None]:
labels = ["10","20","30","40","50","60","70","80","90","100"]
labels_number = [int(label) for label in labels]
labels = [f"{label}'s" for label in labels]
d = pd.cut(cust["age"],labels_number,include_lowest=True, right=False, labels=labels[:-1])
cust["age_binned"] = pd.Categorical(d , categories=labels[:-1], ordered=True)

In [None]:
cust["age_binned"].value_counts(sort=False)

In [None]:
plot_bar(cust, 'age_binned', pct=True)

In [None]:
trans = pd.read_csv("../input/h-and-m-personalized-fashion-recommendations/transactions_train.csv")

In [None]:
transaction_time = pd.to_datetime(trans["t_dat"])

In [None]:
table = pd.Categorical(transaction_time.dt.year.values.squeeze(),ordered=True).value_counts()
print(pd.DataFrame(table).T)
table.plot.bar(figsize=(16,4), rot=90)

In [None]:
fig, ax = plt.subplots(figsize=(5,5))
explode = (0, 0.1)
colors = sns.color_palette('Paired')
ax.pie(trans['sales_channel_id'].value_counts(), explode=explode, labels=['1','2'],
       autopct='%1.1f%%',shadow=True, startangle=90, colors=colors)
ax.axis('equal')
ax.set_title('Sale channel')
plt.show()

**Collaborative Filtering Method for prediction fashion recommendation**

reference:
https://medium.com/@toprak.mhmt/collaborative-filtering-3ceb89080ade#:~:text=Collaborative%20filtering%20is%20based%20on,like%20or%20be%20interested%20in.