Segmentez des clients d'un site de e-commerce
==========================================

![logo](https://olist.com/wp-content/uploads/2018/04/Logo-01.png)

In [None]:
import os
from pathlib import Path
from datetime import datetime

import matplotlib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import widgets, interact
from IPython.display import display

from src.features.build_features import load_data, make_dataset
from src.visualization.visualize import piechart, distplot

sns.set(font_scale=1.5)

## Chargement des données

In [None]:
# Use personal function from src
data = load_data(Path('../data/raw').resolve())

In [None]:
# Load all data
customers = make_dataset(data, '1970-01-01', '2020-01-01')

## Analyse des données

In [None]:
customers.describe()

### Fréquence

In [None]:
piechart(customers['frequency'].apply(
    lambda x: 'unique' if x == 1 else 'régulier'))

### Nombre d'articles par panier

In [None]:
piechart(customers['item_per_c'].apply(lambda x: 1 if x == 1 else '1+'))

### Récence

In [None]:
distplot(customers['recency'], kde=False, norm_hist=True)
plt.show()

### Monetary

In [None]:
distplot(customers['monetary'], kde=False, norm_hist=True)
plt.show()

### Review score

In [None]:
f, ax = plt.subplots(1, figsize=(8, 6))
ax.hist(customers['review_score'], range=(0, 5), bins=5)
ax.set_xticks([x for x in range(1, 6)])
plt.xlim(1, 5)
plt.show()

### Catégories

In [None]:
categs = ['clothing', 'hobbies', 'high-tech', 'home', 'other']

In [None]:
cat_count = customers[categs].astype(bool).sum()
_, ax = plt.subplots(1, figsize=(8, 8))
ax.pie(cat_count.values, labels=cat_count.index, autopct='%1.1f%%')
plt.show()

In [None]:
cat_count.sort_values(inplace=True)
plt.barh(y=cat_count.index, width=cat_count.values)

In [None]:
revenues_repartition = customers[categs].sum()
_, ax = plt.subplots(1, figsize=(8, 8))
ax.pie(revenues_repartition.values,
       labels=revenues_repartition.index, autopct='%1.1f%%')
plt.show()

## Dataset pour segmentation

In [None]:
customers = make_dataset(data, '2017-01-01', '2018-01-01')

In [None]:
customers.to_csv('../data/processed/dataset.csv', index=False)