## 021 データを読み込む

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np

In [None]:
uselog = pd.read_csv('./input/100knoks/03/use_log.csv')
print(len(uselog))
uselog.head()

In [None]:
customer_master = pd.read_csv('./input/100knoks/03/customer_master.csv')
print(len(customer_master))
customer_master.head()

In [None]:
class_master = pd.read_csv('./input/100knoks/03/class_master.csv')
print(len(class_master))
class_master.head()

In [None]:
campaign_master = pd.read_csv('./input/100knoks/03/campaign_master.csv')
print(len(campaign_master))
campaign_master.head()

## 022 顧客データの整形

In [None]:
customer = pd.merge(customer_master, class_master, on='class', how='left')
customer = pd.merge(customer, campaign_master, on='campaign_id', how='left')
customer

In [None]:
customer.isnull().sum()

## 023 顧客データの基礎集計

In [None]:
customer.groupby('class_name').count()['customer_id']

In [None]:
customer.groupby('campaign_name').count()['customer_id']

In [None]:
customer.groupby('gender').count()['customer_id']

In [None]:
customer.groupby('is_deleted').count()['customer_id']

In [None]:
customer['start_date'] = pd.to_datetime(customer['start_date'])
customer_start = customer.loc[customer['start_date'] > pd.to_datetime('2018-4-1')]
print(customer_start)
print(len(customer_start))

## 024 最新顧客データの基礎集計をしてみる

In [None]:
customer['end_date'] = pd.to_datetime(customer['end_date'])
customer_newer = customer.loc[(customer['end_date'] >= pd.to_datetime('20190331')) | (customer['end_date'].isna())]
print(customer_newer)
print(customer_newer['end_date'].unique())

In [None]:
customer_newer.groupby('class_name').count()['customer_id']

In [None]:
customer_newer.groupby('campaign_name').count()['customer_id']

In [None]:
customer_newer.groupby('gender').count()['customer_id']

## 025 利用履歴データを集計する

In [None]:
uselog['usedate'].isnull().sum()

In [None]:

uselog['usedate'] = pd.to_datetime(uselog['usedate'])
uselog['年月'] = uselog['usedate'].dt.strftime("%Y%m")
uselog_months = uselog.groupby(['年月', 'customer_id'], as_index=False).count()
uselog_months.rename(columns={'log_id':'count'}, inplace=True)
del uselog_months['usedate']
uselog_months.head()

In [None]:
uselog_customer = uselog_months.groupby('customer_id').agg(['mean', 'median', 'max', 'min'])['count']
uselog_customer

## 026 利用履歴から定期利用フラグを作成

定期的に利用しているかを毎週同じ曜日に利用しているかで判断

In [None]:
uselog['weekday'] = uselog['usedate'].dt.weekday
uselog_weekday = uselog.groupby(['customer_id', '年月', 'weekday'], as_index=False).count()[['customer_id', '年月', 'weekday', 'log_id']]
uselog_weekday.rename(columns={'log_id':'count'}, inplace=True)
uselog_weekday.head()

In [None]:
uselog_weekday = uselog_weekday.groupby('customer_id', as_index=False).max()[['customer_id', 'count']]
uselog_weekday['routine_flg'] = 0
uselog_weekday['routine_flg'] = uselog_weekday['routine_flg'].where(uselog_weekday['count']<4, 1)
uselog_weekday.head()

## 027 顧客データと利用履歴を結合

In [None]:
customer = pd.merge(customer, uselog_customer, on='customer_id', how='left')
customer = pd.merge(customer, uselog_weekday[['customer_id', 'routine_flg']], on='customer_id', how='left')
customer.head()

In [None]:
customer.isnull().sum()

## 028 会員期間を計算する

2019年4月30日までの会員期間を計算する

In [None]:
from dateutil.relativedelta import relativedelta
customer['calc_date'] = customer['end_date']
customer['calc_date'] = customer['calc_date'].fillna(pd.to_datetime('20190430'))
customer['membership_period'] = 0
#customer['membership_period'] = relativedelta(customer['calc_date'] - customer['start_date'])
for i in range(len(customer)):
    delta = relativedelta(customer['calc_date'].iloc[i], customer['start_date'].iloc[i])
    customer.loc[i, 'membership_period'] = delta.years*12 + delta.months
customer.head(100)

## 029 顧客行動の各種統計量を把握する

In [None]:
customer[['mean', 'median', 'max', 'min']].describe()

In [None]:
customer.groupby('routine_flg').count()['customer_id']

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

sns.histplot(data=customer, x='membership_period')

In [None]:
plt.hist(customer['membership_period'])

## 030 退会ユーザと継続ユーザの違い

In [None]:
customer_end = customer.loc[customer['is_deleted']==1]
customer_end.describe()

In [None]:
customer_stay = customer.loc[customer['is_deleted']==0]
customer_stay.describe()

In [None]:
customer.to_csv('./input/100knoks/03/customer_join.csv', index=False)