# Рекомендательная система на основе рейтинга товара

## Библиотеки

In [1]:
import pandas as pd
from utils import HM

## Загрузка данных

In [2]:
hm = HM()

articles = hm.get_articles(is_prepared=True)
customers = hm.get_customers()
sample_submission = hm.get_sample_submission()
transactions_train = hm.get_transactions_train()

## Константы

In [3]:
count_articles = 12

## Предсказание простое

Возьмем `count_articles` самых популярных товаров и проставим для каждого пользователя

In [4]:
top_articles = articles[:count_articles]['article_id'].astype(str).map(lambda x: '0' + x)
top_articles_in_str = ' '.join(top_articles)

In [5]:
top_articles_in_str

'0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001'

In [6]:
sample_submission.iloc[0]['prediction']

'0706016001 0706016002 0372860001 0610776002 0759871002 0464297007 0372860002 0610776001 0399223001 0706016003 0720125001 0156231001'

### Вывод

sample_submission идеально соответсвует результату рекомендательной системе на основе простому рейтингу.

А вот результат этого решения на Kaggle:

![изображение.png](attachment:ce009103-ea22-480b-bb7a-7db067dbbbab.png)

Вернемся обратно на [этап подготовки данных](./Data_preparation.ipynb) и определим пол для каждого клиента

## Предсказывание посложнее

In [58]:
def get_top_articles(df: pd.DataFrame, by: str, value: str, count: int):
    return df[df[by] == value].sort_values(by='rating', ascending=False)[:count]['article_id']

In [8]:
def articles_to_str(series: pd.Series):
    return ' '.join(series.astype(str).map(lambda x: '0' + x))

In [9]:
articles_with_gender = hm.get_articles(is_prepared=True)
customers_with_gender = hm.get_customers(is_prepared=True)

In [21]:
top_articles_mens = get_top_articles(df=articles_with_gender, by='gender', value='M', count=count_articles)
top_articles_womens = get_top_articles(df=articles_with_gender, by='gender', value='W', count=count_articles)
top_articles_other = get_top_articles(df=articles_with_gender, by='gender', value='Other', count=count_articles)

top_articles_mens = articles_to_str(top_articles_mens)
top_articles_womens = articles_to_str(top_articles_womens)
top_articles_other = articles_to_str(top_articles_other)

In [22]:
sample_submission_with_gender = sample_submission.merge(customers_with_gender, on='customer_id', how='left')

In [23]:
mens_index = sample_submission_with_gender[sample_submission_with_gender['gender'] == 'M'].index
womens_index = sample_submission_with_gender[sample_submission_with_gender['gender'] == 'W'].index
other_index = sample_submission_with_gender[sample_submission_with_gender['gender'] == 'Other'].index

In [24]:
sample_submission_with_gender.loc[mens_index, 'prediction'] = top_articles_mens
sample_submission_with_gender.loc[womens_index, 'prediction'] = top_articles_womens
sample_submission_with_gender.loc[other_index, 'prediction'] = top_articles_other

In [25]:
sample_submission_with_gender.loc[mens_index, 'prediction']

11         0685814001 0685816002 0685816001 0685813001 06...
20         0685814001 0685816002 0685816001 0685813001 06...
88         0685814001 0685816002 0685816001 0685813001 06...
99         0685814001 0685816002 0685816001 0685813001 06...
103        0685814001 0685816002 0685816001 0685813001 06...
                                 ...                        
1362213    0685814001 0685816002 0685816001 0685813001 06...
1362222    0685814001 0685816002 0685816001 0685813001 06...
1362238    0685814001 0685816002 0685816001 0685813001 06...
1362240    0685814001 0685816002 0685816001 0685813001 06...
1362275    0685814001 0685816002 0685816001 0685813001 06...
Name: prediction, Length: 53324, dtype: object

In [29]:
sample_submission_with_gender[['customer_id', 'prediction']].set_index('customer_id').to_csv('./prediction/rating_base.csv')

### Вывод

Получили результат, хуже предыдущего :(

![изображение.png](attachment:d2f0b586-def9-46af-a0cd-5a9b28b277ca.png)

## Предсказывание чуть послежнее (дубль 2)

### Определим для каждого товара возрастную группу, которая чаще всех его покупает

In [41]:
transactions_train_with_age_group = transactions_train.merge(customers[['customer_id', 'age_group']], on='customer_id', how='left')

In [48]:
article_and_top_age_group = pd.read_csv('./data_prepared/article_and_top_age_group.csv')

In [50]:
article_and_top_age_group.head()

Unnamed: 0,article_id,age_group
0,108775015,YOUNG
1,108775044,YOUNG
2,108775051,YOUNG
3,110065001,YOUNG
4,110065002,YOUNG


In [52]:
articles_with_age_group = articles.merge(article_and_top_age_group, on='article_id', how='left')

### Предсказыавние

In [67]:
sample_submission = hm.get_sample_submission()

In [59]:
top_articles_young = get_top_articles(df=articles_with_age_group, by='age_group', value='YOUNG', count=count_articles)
top_articles_medium = get_top_articles(df=articles_with_age_group, by='age_group', value='MEDIUM', count=count_articles)
top_articles_elderly = get_top_articles(df=articles_with_age_group, by='age_group', value='ELDERLY', count=count_articles)
top_articles_senile = get_top_articles(df=articles_with_age_group, by='age_group', value='SENILE', count=count_articles)
top_articles_long_live = get_top_articles(df=articles_with_age_group, by='age_group', value='LONG_LIVE', count=count_articles)

top_articles_young = articles_to_str(top_articles_young)
top_articles_medium = articles_to_str(top_articles_medium)
top_articles_elderly = articles_to_str(top_articles_elderly)
top_articles_senile = articles_to_str(top_articles_senile)
top_articles_long_live = articles_to_str(top_articles_long_live)

In [62]:
transactions_train_with_age_group = transactions_train.merge(customers[['customer_id', 'age_group']], on='customer_id', how='left')

In [69]:
sample_submission_with_age_group = sample_submission.merge(customers[['customer_id', 'age_group']], on='customer_id', how='left')

In [44]:
# article_and_top_age_group = transactions_train_with_age_group.groupby('article_id')['age_group'].apply(lambda x: x.describe().top)

In [47]:
# article_and_top_age_group.to_csv('./data_prepared/article_and_top_age_group.csv')

In [71]:
young_index = sample_submission_with_age_group[sample_submission_with_age_group['age_group'] == 'YOUNG'].index
medium_index = sample_submission_with_age_group[sample_submission_with_age_group['age_group'] == 'MEDIUM'].index
elderly_index = sample_submission_with_age_group[sample_submission_with_age_group['age_group'] == 'ELDERLY'].index
senile_index = sample_submission_with_age_group[sample_submission_with_age_group['age_group'] == 'SENILE'].index
long_live_index = sample_submission_with_age_group[sample_submission_with_age_group['age_group'] == 'LONG_LIVE'].index

In [72]:
sample_submission_with_age_group.loc[young_index, 'prediction'] = top_articles_young
sample_submission_with_age_group.loc[medium_index, 'prediction'] = top_articles_medium
sample_submission_with_age_group.loc[elderly_index, 'prediction'] = top_articles_elderly
sample_submission_with_age_group.loc[senile_index, 'prediction'] = top_articles_senile
sample_submission_with_age_group.loc[long_live_index, 'prediction'] = top_articles_long_live

In [48]:
article_and_top_age_group = pd.read_csv('./data_prepared/article_and_top_age_group.csv')

In [74]:
sample_submission_with_age_group[['customer_id', 'prediction']].set_index('customer_id').to_csv('./prediction/rating_base.csv')

In [50]:
article_and_top_age_group.head()

Unnamed: 0,article_id,age_group
0,108775015,YOUNG
1,108775044,YOUNG
2,108775051,YOUNG
3,110065001,YOUNG
4,110065002,YOUNG


### Вывод

Результат еще стал хуже

![изображение.png](attachment:8047aef6-1603-4fdd-8e86-6cdfbb3c4e03.png)

![изображение.png](attachment:404e2d16-e3de-4d9f-a60b-f2d5c856438a.png)

In [51]:
articles.head()

Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,...,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc,rating,price,gender
0,706016001,706016,Jade HW Skinny Denim TRS,272,Trousers,Garment Lower body,1010016,Solid,9,Black,...,2,Divided,53,Divided Collection,1009,Trousers,High-waisted jeans in washed superstretch deni...,50287.0,0.032448,Other
1,706016002,706016,Jade HW Skinny Denim TRS,272,Trousers,Garment Lower body,1010016,Solid,71,Light Blue,...,2,Divided,53,Divided Collection,1009,Trousers,High-waisted jeans in washed superstretch deni...,35043.0,0.032426,Other
2,372860001,372860,7p Basic Shaftless,302,Socks,Socks & Tights,1010016,Solid,9,Black,...,1,Ladieswear,62,"Womens Nightwear, Socks & Tigh",1021,Socks and Tights,Fine-knit trainer socks in a soft cotton blend.,31718.0,0.012958,W
3,610776002,610776,Tilly (1),255,T-shirt,Garment Upper body,1010016,Solid,9,Black,...,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,T-shirt in lightweight jersey with a rounded h...,30199.0,0.008083,W
4,759871002,759871,Tilda tank,253,Vest top,Garment Upper body,1010016,Solid,9,Black,...,2,Divided,80,Divided Complements Other,1002,Jersey Basic,"Cropped, fitted top in cotton jersey with narr...",26329.0,0.005605,Other


In [52]:
articles_with_age_group = articles.merge(article_and_top_age_group, on='article_id', how='left')

In [53]:
articles_with_age_group

Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,...,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc,rating,price,gender,age_group
0,706016001,706016,Jade HW Skinny Denim TRS,272,Trousers,Garment Lower body,1010016,Solid,9,Black,...,Divided,53,Divided Collection,1009,Trousers,High-waisted jeans in washed superstretch deni...,50287.0,0.032448,Other,YOUNG
1,706016002,706016,Jade HW Skinny Denim TRS,272,Trousers,Garment Lower body,1010016,Solid,71,Light Blue,...,Divided,53,Divided Collection,1009,Trousers,High-waisted jeans in washed superstretch deni...,35043.0,0.032426,Other,YOUNG
2,372860001,372860,7p Basic Shaftless,302,Socks,Socks & Tights,1010016,Solid,9,Black,...,Ladieswear,62,"Womens Nightwear, Socks & Tigh",1021,Socks and Tights,Fine-knit trainer socks in a soft cotton blend.,31718.0,0.012958,W,YOUNG
3,610776002,610776,Tilly (1),255,T-shirt,Garment Upper body,1010016,Solid,9,Black,...,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,T-shirt in lightweight jersey with a rounded h...,30199.0,0.008083,W,YOUNG
4,759871002,759871,Tilda tank,253,Vest top,Garment Upper body,1010016,Solid,9,Black,...,Divided,80,Divided Complements Other,1002,Jersey Basic,"Cropped, fitted top in cotton jersey with narr...",26329.0,0.005605,Other,YOUNG
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105537,926010001,926010,SC MIDDAY Tee,255,T-shirt,Garment Upper body,1010016,Solid,10,White,...,Ladieswear,82,Special Collections,1001,Unknown,Studio Collection. Overdimensioned T-shirt in ...,0.0,0.028760,Other,
105538,896195002,896195,CHILLY LS body 2P,256,Bodysuit,Garment Upper body,1010016,Solid,81,Light Turquoise,...,Baby/Children,44,Baby Essentials & Complements,1006,Woven/Jersey/Knitted mix Baby,Baby Exclusive. Long-sleeved bodysuits in soft...,0.0,0.028760,Other,
105539,917203001,917203,BRYSON PILE jacket,252,Sweater,Garment Upper body,1010020,Contrast,73,Dark Blue,...,Menswear,21,Contemporary Casual,1005,Jersey Fancy,Jacket in soft faux shearling with a high coll...,0.0,0.028760,Other,
105540,917203003,917203,BRYSON PILE jacket,252,Sweater,Garment Upper body,1010020,Contrast,9,Black,...,Menswear,21,Contemporary Casual,1005,Jersey Fancy,Jacket in soft faux shearling with a high coll...,0.0,0.028760,Other,
