# Recommendations based on Top12-sold articles per age-cluster

This notebook generates 12 recommendations for all customer_ids in the dataset based on the Top12-sold articles in the age cluster the customer is allocated in.

In [None]:
# Import necessary libraries:
import pandas as pd
import numpy as np

# 1.0 Load data

In [None]:
# Load transaction and customer data:
df_trans = pd.read_csv('../data/transactions_train.csv', dtype={'article_id':'string'})
df_cust = pd.read_csv('../data/customers.csv')
df_cust.info()

# 2.0 Generate age groups and assign to transactions

In [None]:
# Generate age groups and assign to customers:
bins= [0, 20, 25 , 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 100]
labels = ['0-19','20-24', '25-29','30-34', '35-39','40-44', '45-49','50-54', '55-59','60-64', '65-69','70-74', '>75']
df_cust['agegroup'] = pd.cut(df_cust['age'], bins=bins, labels=labels, right=False)
df_cust['agegroup'] = df_cust['agegroup'].cat.add_categories('unknown').fillna('unknown')
df_cust.head()

In [None]:
# Assign age groups to transactions based on customer_id:
df_age = df_cust.drop(columns=['FN', 'Active', 'club_member_status',
       'fashion_news_frequency', 'age', 'postal_code'])
df_trans_age = pd.merge(df_trans, df_age, on="customer_id")
df_trans_age.drop(columns=['t_dat', 'customer_id', 'price', 'sales_channel_id'], inplace=True)
df_trans_age.head()

# 4.0 Calculate top 12 per age group

In [None]:
# Calculate article_id counts per age group via crosstab:
sales_per_agegroup = pd.crosstab(df_trans_age.article_id, df_trans_age.agegroup).reset_index()
sales_per_agegroup.head()

In [None]:
# Loop for generating top 12 article list per age group:
groups = list(sales_per_agegroup.columns)
groups.remove('article_id')
df_top12 = pd.DataFrame(columns = ['agegroup', 'articles'])
for element in groups:
    sales_ordered = sales_per_agegroup.sort_values(element,ascending=False).iloc[:12]
    article_list = list(sales_ordered.article_id)
    # append rows to an empty DataFrame
    df_top12 = df_top12.append({'agegroup' : element, 'articles' : article_list}, ignore_index = True)

print(df_top12)

# 3.0 Generate submission csv

In [None]:
# Assign top 12 articles to each customer based on customers age group:
df_submssion = pd.merge(df_age, df_top12, on='agegroup')
df_submssion.head()

In [None]:
# Transform prediction list to string and drop not necessary columns:
df_submssion['prediction'] = df_submssion.articles.apply(lambda x: ' '.join(x))
df_submssion.drop(columns=['agegroup', 'articles'], inplace=True)
df_submssion.head()

In [None]:
# Store top12 per agegroup as submission csv:
df_submssion.to_csv('../data/top12-per-agegroup.csv', index = False)