# 特徴量作成

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
features_temp1 = pd.read_csv('../data/features_format.csv',parse_dates=['visit_date'])
date_info = pd.read_csv('../data/date_info.csv', parse_dates=['calendar_date'])

In [3]:
features_temp1_with_info = pd.merge(features_temp1, date_info, how='left', left_on='visit_date', right_on='calendar_date')
features_temp1_with_info.drop(columns='calendar_date',inplace=True)

In [4]:
features_temp1_with_info['year']=features_temp1_with_info['visit_date'].dt.year
features_temp1_with_info['month']=features_temp1_with_info['visit_date'].dt.month
features_temp1_with_info['day']=features_temp1_with_info['visit_date'].dt.day

## 月末フラグ
- 各月のラスト3日

In [5]:
features_temp1_with_info['end_of_month_flg']=0

In [6]:
features_temp1_with_info.loc[features_temp1_with_info['month'].isin([1,3,5,7,8,10,12])
                             &features_temp1_with_info['day'].isin([29,30,31]), 'end_of_month_flg']=1
features_temp1_with_info.loc[features_temp1_with_info['month'].isin([4,6,9,11])
                             &features_temp1_with_info['day'].isin([28,29,30]), 'end_of_month_flg']=1
features_temp1_with_info.loc[features_temp1_with_info['year'].isin([2016])
                             &features_temp1_with_info['month'].isin([2])
                             &features_temp1_with_info['day'].isin([27,28,29]), 'end_of_month_flg']=1
features_temp1_with_info.loc[features_temp1_with_info['year'].isin([2017])
                             &features_temp1_with_info['month'].isin([2])
                             &features_temp1_with_info['day'].isin([26,27,28]), 'end_of_month_flg']=1

## 給料日フラグ
- 各月5の倍数日
- 休日の場合は前営業日

In [7]:
features_temp1_with_info['payday_flg_temp']=0
features_temp1_with_info['payday_flg']=0

In [8]:
features_temp1_with_info.loc[features_temp1_with_info['day'].isin([5,10,15,20,25,30]), 'payday_flg_temp']=1
features_temp1_with_info.loc[features_temp1_with_info['day'].isin([5,10,15,20,25,30]), 'payday_flg']=1

In [9]:
features_temp1_with_info['payday_1_shift']=features_temp1_with_info['payday_flg_temp'].shift(-1)
features_temp1_with_info['payday_2_shift']=features_temp1_with_info['payday_flg_temp'].shift(-2)

In [10]:
features_temp1_with_info.loc[(features_temp1_with_info['payday_flg_temp']==1) & 
                             (features_temp1_with_info['day_of_week'].isin(['Saturday','Sunday'])), 'payday_flg']=0 
features_temp1_with_info.loc[(features_temp1_with_info['day_of_week']=='Friday')& 
                             ((features_temp1_with_info['payday_1_shift'].isin([1])|
                              (features_temp1_with_info['payday_2_shift'].isin([1])))),'payday_flg'] = 1

In [11]:
features_temp1_with_info.loc[(~features_temp1_with_info['day_of_week'].isin(['Saturday', 'Sunday']))&
                          (features_temp1_with_info['holiday_flg']==1)&
                          (features_temp1_with_info['payday_flg_temp']==1), 'payday_flg']=0
features_temp1_with_info.loc[features_temp1_with_info['visit_date'].isin(['2016-05-02','2016-08-12','2016-10-07','2016-12-29']), 'payday_flg']=1

In [12]:
features_temp1_with_info.drop(columns=['payday_flg_temp','payday_1_shift','payday_2_shift'],inplace=True)

## 祝前日フラグ
- 金曜日・土曜日と祝日の前日

In [17]:
features_temp1_with_info['happy_day_flg']=0
features_temp1_with_info['holiday_1_shift']=features_temp1_with_info['holiday_flg'].shift(-1)

In [18]:
features_temp1_with_info.loc[features_temp1_with_info['day_of_week'].isin(['Friday','Saturday']), 'happy_day_flg']=1

In [19]:
features_temp1_with_info.loc[features_temp1_with_info['holiday_1_shift']==1,'happy_day_flg']=1

In [20]:
features_temp1_with_info.drop(columns=['holiday_1_shift'],inplace=True)

## 定休日フラグ
- 各店舗における2016年1月1日から2017年3月14日までの来客数が0の曜日

In [21]:
visitors_by_dayofweek=features_temp1_with_info.groupby(['air_store_id', 'day_of_week'])['visitors'].sum().reset_index()
visitors_by_dayofweek.rename(columns={'visitors':'grouped_visitors'}, inplace=True)

In [22]:
features_temp1_with_info=pd.merge(features_temp1_with_info, visitors_by_dayofweek, how='left', on=['air_store_id','day_of_week'])

In [23]:
features_temp1_with_info['reg_holiday_flg']=0
features_temp1_with_info.loc[features_temp1_with_info['grouped_visitors']==0, 'reg_holiday_flg']=1
features_temp1_with_info.drop(columns='grouped_visitors', inplace=True)

## csv出力
- ここまでのデータをcsvへ書き出す

In [396]:
features_temp1_with_info.to_csv('../data/features_related_days.csv')

## 0622 17:00 更新分

## 月別の物価指数と月別の日経平均の追加

In [71]:
features_temp1_with_info['year_month']=features_temp1_with_info['visit_date'].dt.strftime('%Y-%m')

In [48]:
consume_expenditure=pd.read_csv('../data/NC11M.csv',encoding='utf-8',usecols=[0,1],parse_dates=['年月'])
nikkei=pd.read_csv('../data/NE51M.csv',encoding='utf-8',usecols=[0,1],parse_dates=['年月'])

In [57]:
external_data=pd.merge(consume_expenditure, nikkei, on='年月')

In [64]:
external_data['year_month']=external_data['年月'].dt.strftime('%Y-%m')

In [67]:
external_data.rename(columns={'消費支出 二人以上の世帯 (%)':'consumption_expenditure', '日経平均 (円)':'nikkei'}, inplace=True)
external_data.drop(columns='年月', inplace=True)

In [69]:
external_data=external_data[(external_data['year_month']>='2016-01')&(external_data['year_month']<='2017-03')]

In [72]:
features_related_days = pd.merge(features_temp1_with_info, external_data, how='left', on='year_month')
features_related_days.drop(columns=['year','month','day','year_month'],inplace=True)

In [76]:
features_related_days.to_csv('../data/features_related_days.csv')