# 特徴量作成

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
%matplotlib inline

In [377]:
features_temp1 = pd.read_csv('../data/features_format.csv',parse_dates=['visit_date'])
date_info = pd.read_csv('../data/date_info.csv', parse_dates=['calendar_date'])

In [378]:
features_temp1_with_info = pd.merge(features_temp1, date_info, how='left', left_on='visit_date', right_on='calendar_date')
features_temp1_with_info.drop(columns='calendar_date',inplace=True)

In [379]:
features_temp1_with_info['year']=features_temp1_with_info['visit_date'].dt.year
features_temp1_with_info['month']=features_temp1_with_info['visit_date'].dt.month
features_temp1_with_info['day']=features_temp1_with_info['visit_date'].dt.day

## 月末フラグ
- 各月のラスト3日

In [380]:
features_temp1_with_info['end_of_month_flg']=0

In [381]:
features_temp1_with_info.loc[features_temp1_with_info['month'].isin([1,3,5,7,8,10,12])&features_temp1_with_info['day'].isin([29,30,31]), 'end_of_month_flg']=1
features_temp1_with_info.loc[features_temp1_with_info['month'].isin([4,6,9,11])&features_temp1_with_info['day'].isin([28,29,30]), 'end_of_month_flg']=1
features_temp1_with_info.loc[features_temp1_with_info['year'].isin([2016])&features_temp1_with_info['month'].isin([2])&features_temp1_with_info['day'].isin([27,28,29]), 'end_of_month_flg']=1
features_temp1_with_info.loc[features_temp1_with_info['year'].isin([2017])&features_temp1_with_info['month'].isin([2])&features_temp1_with_info['day'].isin([26,27,28]), 'end_of_month_flg']=1

## 給料日フラグ
- 各月5の倍数日
- 休日の場合は前営業日

In [382]:
features_temp1_with_info['payday_flg_temp']=0
features_temp1_with_info['payday_flg']=0

In [383]:
features_temp1_with_info.loc[features_temp1_with_info['day'].isin([5,10,15,20,25,30]), 'payday_flg_temp']=1
features_temp1_with_info.loc[features_temp1_with_info['day'].isin([5,10,15,20,25,30]), 'payday_flg']=1

In [384]:
features_temp1_with_info['payday_1_shift']=features_temp1_with_info['payday_flg_temp'].shift(-1)
features_temp1_with_info['payday_2_shift']=features_temp1_with_info['payday_flg_temp'].shift(-2)

In [385]:
features_temp1_with_info.loc[(features_temp1_with_info['payday_flg_temp']==1) & (features_temp1_with_info['day_of_week'].isin(['Saturday','Sunday'])), 'payday_flg']=0 
features_temp1_with_info.loc[(features_temp1_with_info['day_of_week']=='Friday')& 
                             ((features_temp1_with_info['payday_1_shift'].isin([1])|
                              (features_temp1_with_info['payday_2_shift'].isin([1])))),'payday_flg'] = 1

In [386]:
features_temp1_with_info.loc[(~features_temp1_with_info['day_of_week'].isin(['Saturday', 'Sunday']))&
                          (features_temp1_with_info['holiday_flg']==1)&
                          (features_temp1_with_info['payday_flg_temp']==1), 'payday_flg']=0
features_temp1_with_info.loc[features_temp1_with_info['visit_date'].isin(['2016-05-02','2016-08-12','2016-10-07','2016-12-29']), 'payday_flg']=1

In [387]:
features_temp1_with_info.drop(columns=['payday_flg_temp','payday_1_shift','payday_2_shift'],inplace=True)

## 祝前日フラグ
- 金曜日・土曜日と祝日の前日

In [388]:
features_temp1_with_info['happy_day_flg']=0
features_temp1_with_info['holiday_1_shift']=features_temp1_with_info['holiday_flg'].shift(-1)

In [389]:
features_temp1_with_info.loc[features_temp1_with_info['day_of_week'].isin(['Friday','Saturday']), 'happy_day_flg']=1

In [390]:
features_temp1_with_info.loc[features_temp1_with_info['holiday_1_shift']==1,'happy_day_flg']=1

In [391]:
features_temp1_with_info.drop(columns=['holiday_1_shift'],inplace=True)

## 定休日フラグ
- 各店舗における2016年1月1日から2017年3月14日までの来客数が0の曜日

In [392]:
visitors_by_dayofweek=features_temp1_with_info.groupby(['air_store_id', 'day_of_week'])['visitors'].sum().reset_index()
visitors_by_dayofweek.rename(columns={'visitors':'grouped_visitors'}, inplace=True)

In [393]:
features_temp1_with_info=pd.merge(features_temp1_with_info, visitors_by_dayofweek, how='left', on=['air_store_id','day_of_week'])

In [394]:
features_temp1_with_info['reg_holiday_flg']=0
features_temp1_with_info.loc[features_temp1_with_info['grouped_visitors']==0, 'reg_holiday_flg']=1
features_temp1_with_info.drop(columns='grouped_visitors', inplace=True)

## csv出力
- ここまでのデータをcsvへ書き出す

In [396]:
features_temp1_with_info.to_csv('../data/features_related_days.csv')