## 人流データ、気象データ等を読み込み、LSTM/Transformer用に前処理

In [446]:
data_period = "202001to202309"

In [447]:
import pandas as pd

# import raw dataframe
raw_df = pd.read_csv('/Users/koki/PycharmProjects/MasterThesis/data/raw/population_' + data_period + '.csv').drop(columns='Unnamed: 0')
# convert datetime columns to datetime type 
raw_df['datetime'] = pd.to_datetime(raw_df['datetime'])
# use datetime as index
raw_df = raw_df.set_index('datetime', drop=True)
raw_df.index = raw_df.index.tz_localize(None)
raw_df

Unnamed: 0_level_0,area,15_m,15_f,20_m,20_f,30_m,30_f,40_m,40_f,50_m,50_f,60_m,60_f,70_m,70_f,male,female,total
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-01-01 00:00:00,渋谷駅,1125,4089,6403,9014,471,2468,688,459,13214,3109,2949,3864,2638,1821,20690,31622.0,52312.0
2020-01-01 00:00:00,新宿駅,592,391,1939,1846,399,996,490,326,2002,455,1071,1432,887,640,5788,7678.0,13466.0
2020-01-01 00:00:00,町田駅,746,620,1642,1414,942,1243,830,617,1856,594,1053,1603,1190,1251,7274,8327.0,15601.0
2020-01-01 00:00:00,川崎駅,1009,241,1813,1571,887,1274,932,775,1638,344,1256,1983,1427,1238,7655,8733.0,16388.0
2020-01-01 00:00:00,立川駅,629,291,1391,1163,734,995,632,512,1415,330,718,1283,815,932,5324,6516.0,11840.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-30 21:00:00,八王子駅,1162,1034,2005,2334,973,1912,1002,926,2694,886,1524,2189,1813,1723,10255,11922.0,22177.0
2023-09-30 21:00:00,北千住駅,1018,3348,865,1009,812,2096,1927,949,2290,1808,1623,951,3366,1699,11591,12170.0,23761.0
2023-09-30 21:00:00,東京駅,1549,1002,3696,6803,538,2939,1237,543,5236,1129,3563,3262,2709,2728,18707,18227.0,36934.0
2023-09-30 21:00:00,赤羽駅,1497,1104,3378,4908,1275,2539,1307,1136,4412,1193,2953,2949,2503,2403,16542,17015.0,33557.0


### Preprocess the data

In [448]:
# make total column right by summing all ages
raw_df['total'] = raw_df.iloc[:, 1:15].sum(axis=1)
raw_df.isna().sum()

area      0
15_m      0
15_f      0
20_m      0
20_f      0
30_m      0
30_f      0
40_m      0
40_f      0
50_m      0
50_f      0
60_m      0
60_f      0
70_m      0
70_f      0
male      0
female    4
total     0
dtype: int64

In [449]:
# create df that has only total columns
use_cols = ['area', 'total']  # columns of interest
df_total = raw_df[use_cols]

df = pd.DataFrame()
cols = ['渋谷駅', '新宿駅', '町田駅', '川崎駅', '立川駅', '八王子駅', '北千住駅', '東京駅', '赤羽駅', '自由が丘駅']
for i in cols:
    # rename columns
    _df = df_total[df_total['area']==i].add_prefix(i+'_').drop(columns=[i+'_area'])
    df = pd.concat([df, _df], axis=1,)
df

Unnamed: 0_level_0,渋谷駅_total,新宿駅_total,町田駅_total,川崎駅_total,立川駅_total,八王子駅_total,北千住駅_total,東京駅_total,赤羽駅_total,自由が丘駅_total
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-01 00:00:00,52312,13466,15601,16388,11840,12463,10114,6844,15835,9193
2020-01-01 03:00:00,29549,13772,14465,16592,11394,11738,10870,7597,15652,8302
2020-01-01 06:00:00,16932,13769,13526,16721,10888,11457,10116,18346,15415,7900
2020-01-01 09:00:00,17737,20061,14759,22414,13119,12869,10312,22968,15843,8328
2020-01-01 12:00:00,26124,27592,17690,32729,15421,14398,11487,34839,17879,10083
...,...,...,...,...,...,...,...,...,...,...
2023-09-30 09:00:00,35347,42090,21798,28723,25356,17428,17618,37380,20538,15806
2023-09-30 12:00:00,84314,92850,36892,44786,47068,24358,27022,71237,26053,25193
2023-09-30 15:00:00,110158,113744,41491,49685,52116,25661,29211,78514,29550,27711
2023-09-30 18:00:00,112153,106017,41137,47749,48999,26222,31059,65990,30933,26047


In [450]:
# check NA
df.isna().sum()

渋谷駅_total      0
新宿駅_total      0
町田駅_total      0
川崎駅_total      0
立川駅_total      0
八王子駅_total     0
北千住駅_total     0
東京駅_total      0
赤羽駅_total      0
自由が丘駅_total    0
dtype: int64

### Holiday dummy

In [451]:
# make holiday dummy (1 if holiday, 0 otherwise)
import datetime
import jpholiday

# unofficial holidays
my_holiday_list = ['0102', '0103', '0214', '0314',
                   '0813', '0814', '0815', '0816',
                   '1031', '1224', '1225', '1230',
                   '1231']

df['official_holiday'] = df.index.map(lambda d: int(jpholiday.is_holiday(d)))
df['unofficial_holiday'] = df.index.map(lambda d: int(d.strftime("%m%d") in my_holiday_list))
df

Unnamed: 0_level_0,渋谷駅_total,新宿駅_total,町田駅_total,川崎駅_total,立川駅_total,八王子駅_total,北千住駅_total,東京駅_total,赤羽駅_total,自由が丘駅_total,official_holiday,unofficial_holiday
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-01-01 00:00:00,52312,13466,15601,16388,11840,12463,10114,6844,15835,9193,1,0
2020-01-01 03:00:00,29549,13772,14465,16592,11394,11738,10870,7597,15652,8302,1,0
2020-01-01 06:00:00,16932,13769,13526,16721,10888,11457,10116,18346,15415,7900,1,0
2020-01-01 09:00:00,17737,20061,14759,22414,13119,12869,10312,22968,15843,8328,1,0
2020-01-01 12:00:00,26124,27592,17690,32729,15421,14398,11487,34839,17879,10083,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-30 09:00:00,35347,42090,21798,28723,25356,17428,17618,37380,20538,15806,0,0
2023-09-30 12:00:00,84314,92850,36892,44786,47068,24358,27022,71237,26053,25193,0,0
2023-09-30 15:00:00,110158,113744,41491,49685,52116,25661,29211,78514,29550,27711,0,0
2023-09-30 18:00:00,112153,106017,41137,47749,48999,26222,31059,65990,30933,26047,0,0


In [452]:
# # show all holidays during the period in df
# jpholiday.between(df.index[0], df.index[-1])

### Month, Hour, Weekday dummy

In [453]:
day = 8  # num of timestamps in 1 day

df['month'] = [str(d.month) for d in df.index]
df['hour'] = [str(d.hour) for d in df.index]

dow = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
df['dow'] = df.index.map(lambda d: dow[d.weekday()])

df = pd.get_dummies(df)

### Heavy rain dummy

In [454]:
# make a heavy rain dummy
rain_list = ['2017-10-20','2017-10-21','2017-10-22','2017-10-23',
        '2018-09-29','2018-09-30','2018-10-01',
        '2019-09-07','2019-09-08','2019-09-09',
        '2019-10-12','2019-10-13',
        ]
df['heavy_rain'] = df.index.map(lambda d: int(d.strftime("%Y-%m-%d") in rain_list))

### Weather dummy

In [455]:
# get weather information in tokyo
tokyo_weather = pd.read_csv('/Users/koki/PycharmProjects/MasterThesis/data/preprocessed/weather_tokyo_' + data_period + '_preprocessed.csv')
tokyo_weather

Unnamed: 0,東京_date_time,東京_rainfall,東京_temperature,東京_dew_point_humidity,東京_humidity,東京_windspeed_value,東京_sunshine_hours,東京_global_solar_radiation,東京_snowfall,東京_weather_みぞれ,東京_weather_快晴,東京_weather_晴れ,東京_weather_曇,東京_weather_薄曇,東京_weather_雨,東京_weather_雪,東京_weather_雷電,東京_weather_霧雨
0,2020-01-01 00:00:00,0.0,3.7,-7.7,43.0,4.5,0.0,0.00,0.0,0,0,1,0,0,0,0,0,0
1,2020-01-01 03:00:00,0.0,3.6,-7.5,44.0,4.8,0.0,0.00,0.0,0,0,1,0,0,0,0,0,0
2,2020-01-01 06:00:00,0.0,4.1,-6.8,45.0,4.7,0.0,0.00,0.0,0,0,0,1,0,0,0,0,0
3,2020-01-01 09:00:00,0.0,5.2,-5.7,45.0,1.8,0.6,0.71,0.0,0,0,1,0,0,0,0,0,0
4,2020-01-01 12:00:00,0.0,9.4,-4.5,37.0,1.9,1.0,1.93,0.0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10947,2023-09-30 09:00:00,0.0,26.2,21.4,75.0,2.7,0.1,0.99,0.0,0,0,0,1,0,0,0,0,0
10948,2023-09-30 12:00:00,0.0,27.1,21.4,71.0,2.4,0.2,1.71,0.0,0,0,0,1,0,0,0,0,0
10949,2023-09-30 15:00:00,0.0,26.8,22.4,77.0,1.3,0.0,0.67,0.0,0,0,1,0,0,0,0,0,0
10950,2023-09-30 18:00:00,0.0,25.6,21.7,79.0,2.9,0.0,0.01,0.0,0,0,1,0,0,0,0,0,0


In [456]:
# make weather variables
df = pd.concat([df.reset_index(), tokyo_weather.iloc[:,1:]], axis=1).set_index('datetime', drop=True)
df

Unnamed: 0_level_0,渋谷駅_total,新宿駅_total,町田駅_total,川崎駅_total,立川駅_total,八王子駅_total,北千住駅_total,東京駅_total,赤羽駅_total,自由が丘駅_total,...,東京_snowfall,東京_weather_みぞれ,東京_weather_快晴,東京_weather_晴れ,東京_weather_曇,東京_weather_薄曇,東京_weather_雨,東京_weather_雪,東京_weather_雷電,東京_weather_霧雨
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01 00:00:00,52312,13466,15601,16388,11840,12463,10114,6844,15835,9193,...,0.0,0,0,1,0,0,0,0,0,0
2020-01-01 03:00:00,29549,13772,14465,16592,11394,11738,10870,7597,15652,8302,...,0.0,0,0,1,0,0,0,0,0,0
2020-01-01 06:00:00,16932,13769,13526,16721,10888,11457,10116,18346,15415,7900,...,0.0,0,0,0,1,0,0,0,0,0
2020-01-01 09:00:00,17737,20061,14759,22414,13119,12869,10312,22968,15843,8328,...,0.0,0,0,1,0,0,0,0,0,0
2020-01-01 12:00:00,26124,27592,17690,32729,15421,14398,11487,34839,17879,10083,...,0.0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-30 09:00:00,35347,42090,21798,28723,25356,17428,17618,37380,20538,15806,...,0.0,0,0,0,1,0,0,0,0,0
2023-09-30 12:00:00,84314,92850,36892,44786,47068,24358,27022,71237,26053,25193,...,0.0,0,0,0,1,0,0,0,0,0
2023-09-30 15:00:00,110158,113744,41491,49685,52116,25661,29211,78514,29550,27711,...,0.0,0,0,1,0,0,0,0,0,0
2023-09-30 18:00:00,112153,106017,41137,47749,48999,26222,31059,65990,30933,26047,...,0.0,0,0,1,0,0,0,0,0,0


In [457]:
# get weather information in hachioji
hachioji_weather = pd.read_csv('/Users/koki/PycharmProjects/MasterThesis/data/preprocessed/weather_hachioji_' + data_period + '_preprocessed.csv')
hachioji_weather

Unnamed: 0,八王子_date_time,八王子_rainfall,八王子_temperature,八王子_windspeed_value,八王子_sunshine_hours
0,2020-01-01 00:00:00,0.0,2.2,1.7,0.0
1,2020-01-01 03:00:00,0.0,-0.7,0.4,0.0
2,2020-01-01 06:00:00,0.0,-1.4,1.2,0.0
3,2020-01-01 09:00:00,0.0,2.9,0.5,1.0
4,2020-01-01 12:00:00,0.0,8.0,1.7,1.0
...,...,...,...,...,...
10947,2023-09-30 09:00:00,0.0,24.7,1.8,0.0
10948,2023-09-30 12:00:00,0.0,24.0,0.8,0.0
10949,2023-09-30 15:00:00,0.0,26.4,2.7,0.0
10950,2023-09-30 18:00:00,0.0,25.3,4.8,0.0


In [458]:
# make weather variables
df = pd.concat([df.reset_index(), hachioji_weather.iloc[:,1:]], axis=1).set_index('datetime', drop=True)
df

Unnamed: 0_level_0,渋谷駅_total,新宿駅_total,町田駅_total,川崎駅_total,立川駅_total,八王子駅_total,北千住駅_total,東京駅_total,赤羽駅_total,自由が丘駅_total,...,東京_weather_曇,東京_weather_薄曇,東京_weather_雨,東京_weather_雪,東京_weather_雷電,東京_weather_霧雨,八王子_rainfall,八王子_temperature,八王子_windspeed_value,八王子_sunshine_hours
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01 00:00:00,52312,13466,15601,16388,11840,12463,10114,6844,15835,9193,...,0,0,0,0,0,0,0.0,2.2,1.7,0.0
2020-01-01 03:00:00,29549,13772,14465,16592,11394,11738,10870,7597,15652,8302,...,0,0,0,0,0,0,0.0,-0.7,0.4,0.0
2020-01-01 06:00:00,16932,13769,13526,16721,10888,11457,10116,18346,15415,7900,...,1,0,0,0,0,0,0.0,-1.4,1.2,0.0
2020-01-01 09:00:00,17737,20061,14759,22414,13119,12869,10312,22968,15843,8328,...,0,0,0,0,0,0,0.0,2.9,0.5,1.0
2020-01-01 12:00:00,26124,27592,17690,32729,15421,14398,11487,34839,17879,10083,...,0,0,0,0,0,0,0.0,8.0,1.7,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-30 09:00:00,35347,42090,21798,28723,25356,17428,17618,37380,20538,15806,...,1,0,0,0,0,0,0.0,24.7,1.8,0.0
2023-09-30 12:00:00,84314,92850,36892,44786,47068,24358,27022,71237,26053,25193,...,1,0,0,0,0,0,0.0,24.0,0.8,0.0
2023-09-30 15:00:00,110158,113744,41491,49685,52116,25661,29211,78514,29550,27711,...,0,0,0,0,0,0,0.0,26.4,2.7,0.0
2023-09-30 18:00:00,112153,106017,41137,47749,48999,26222,31059,65990,30933,26047,...,0,0,0,0,0,0,0.0,25.3,4.8,0.0


### N225 dummy

In [459]:
# get n225 data
n225 = pd.read_csv('/Users/koki/PycharmProjects/MasterThesis/data/preprocessed/N225_' + data_period + '_preprocessed.csv')
n225

Unnamed: 0,Date,N225_Close
0,2020-01-01 00:00:00,23204.86
1,2020-01-01 03:00:00,23204.86
2,2020-01-01 06:00:00,23204.86
3,2020-01-01 09:00:00,23204.86
4,2020-01-01 12:00:00,23204.86
...,...,...
10947,2023-09-30 09:00:00,31857.62
10948,2023-09-30 12:00:00,31857.62
10949,2023-09-30 15:00:00,31857.62
10950,2023-09-30 18:00:00,31857.62


In [460]:
df = pd.concat([df.reset_index(), n225['N225_Close']], axis=1).set_index('datetime', drop=True)
df

Unnamed: 0_level_0,渋谷駅_total,新宿駅_total,町田駅_total,川崎駅_total,立川駅_total,八王子駅_total,北千住駅_total,東京駅_total,赤羽駅_total,自由が丘駅_total,...,東京_weather_薄曇,東京_weather_雨,東京_weather_雪,東京_weather_雷電,東京_weather_霧雨,八王子_rainfall,八王子_temperature,八王子_windspeed_value,八王子_sunshine_hours,N225_Close
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01 00:00:00,52312,13466,15601,16388,11840,12463,10114,6844,15835,9193,...,0,0,0,0,0,0.0,2.2,1.7,0.0,23204.86
2020-01-01 03:00:00,29549,13772,14465,16592,11394,11738,10870,7597,15652,8302,...,0,0,0,0,0,0.0,-0.7,0.4,0.0,23204.86
2020-01-01 06:00:00,16932,13769,13526,16721,10888,11457,10116,18346,15415,7900,...,0,0,0,0,0,0.0,-1.4,1.2,0.0,23204.86
2020-01-01 09:00:00,17737,20061,14759,22414,13119,12869,10312,22968,15843,8328,...,0,0,0,0,0,0.0,2.9,0.5,1.0,23204.86
2020-01-01 12:00:00,26124,27592,17690,32729,15421,14398,11487,34839,17879,10083,...,0,0,0,0,0,0.0,8.0,1.7,1.0,23204.86
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-30 09:00:00,35347,42090,21798,28723,25356,17428,17618,37380,20538,15806,...,0,0,0,0,0,0.0,24.7,1.8,0.0,31857.62
2023-09-30 12:00:00,84314,92850,36892,44786,47068,24358,27022,71237,26053,25193,...,0,0,0,0,0,0.0,24.0,0.8,0.0,31857.62
2023-09-30 15:00:00,110158,113744,41491,49685,52116,25661,29211,78514,29550,27711,...,0,0,0,0,0,0.0,26.4,2.7,0.0,31857.62
2023-09-30 18:00:00,112153,106017,41137,47749,48999,26222,31059,65990,30933,26047,...,0,0,0,0,0,0.0,25.3,4.8,0.0,31857.62


### Covid dummy

In [461]:
# 国内感染者数（新規陽性者数）
# get covid data
cases_tokyo = pd.read_csv('/Users/koki/PycharmProjects/MasterThesis/data/preprocessed/covid_cases_tokyo_preprocessed.csv')
cases_tokyo = cases_tokyo.set_index('Date', drop=True)
cases_tokyo.index = pd.to_datetime(cases_tokyo.index, format='%Y-%m-%d')

df = pd.merge(df, cases_tokyo, left_index=True, right_index=True, how='left')
df['Cases_Tokyo'] = df['Cases_Tokyo'].fillna(0.0)


# 世界感染者数


# 緊急事態宣言ダミー
kinkyujitai_1 = pd.date_range(start='2020-04-07', end='2020-05-25 21:00', freq='3h')
kinkyujitai_2 = pd.date_range(start='2021-01-08', end='2021-03-21 21:00', freq='3h')
kinkyujitai_3 = pd.date_range(start='2021-04-25', end='2021-06-20 21:00', freq='3h')
kinkyujitai_4 = pd.date_range(start='2021-07-12', end='2021-09-30 21:00', freq='3h')

kinkyujitai = kinkyujitai_1.union(kinkyujitai_2).union(kinkyujitai_3).union(kinkyujitai_4)
df['東京_緊急事態'] = df.index.map(lambda d: int(d in kinkyujitai))


# まん防ダミー
manbou_1 = pd.date_range(start='2021-04-12', end='2021-04-24 21:00', freq='3h')
manbou_2 = pd.date_range(start='2021-06-21', end='2021-07-11 21:00', freq='3h')
manbou_3 = pd.date_range(start='2022-01-21', end='2022-03-21 21:00', freq='3h')

manbou = manbou_1.union(manbou_2).union(manbou_3)
df['東京_まん防'] = df.index.map(lambda d: int(d in manbou))

### Order columns

In [462]:
df.columns

Index(['渋谷駅_total', '新宿駅_total', '町田駅_total', '川崎駅_total', '立川駅_total',
       '八王子駅_total', '北千住駅_total', '東京駅_total', '赤羽駅_total', '自由が丘駅_total',
       'official_holiday', 'unofficial_holiday', 'month_1', 'month_10',
       'month_11', 'month_12', 'month_2', 'month_3', 'month_4', 'month_5',
       'month_6', 'month_7', 'month_8', 'month_9', 'hour_0', 'hour_12',
       'hour_15', 'hour_18', 'hour_21', 'hour_3', 'hour_6', 'hour_9',
       'dow_Fri', 'dow_Mon', 'dow_Sat', 'dow_Sun', 'dow_Thu', 'dow_Tue',
       'dow_Wed', 'heavy_rain', '東京_rainfall', '東京_temperature',
       '東京_dew_point_humidity', '東京_humidity', '東京_windspeed_value',
       '東京_sunshine_hours', '東京_global_solar_radiation', '東京_snowfall',
       '東京_weather_みぞれ', '東京_weather_快晴', '東京_weather_晴れ', '東京_weather_曇',
       '東京_weather_薄曇', '東京_weather_雨', '東京_weather_雪', '東京_weather_雷電',
       '東京_weather_霧雨', '八王子_rainfall', '八王子_temperature',
       '八王子_windspeed_value', '八王子_sunshine_hours', 'N225_Close',
       'Cas

In [463]:
# # order the columns: the first half = variables to predict, the latter half = variables to explain
# new_cols = ['渋谷駅_total', '新宿駅_total', '町田駅_total', '川崎駅_total', '立川駅_total',
#        '八王子駅_total', '北千住駅_total', '東京駅_total', '赤羽駅_total', '自由が丘駅_total',
#        'N225_Close',
#        'official_holiday', 'unofficial_holiday', 'month_1', 'month_10',
#        'month_11', 'month_12', 'month_2', 'month_3', 'month_4', 'month_5',
#        'month_6', 'month_7', 'month_8', 'month_9', 'hour_0', 'hour_12',
#        'hour_15', 'hour_18', 'hour_21', 'hour_3', 'hour_6', 'hour_9',
#        'dow_Fri', 'dow_Mon', 'dow_Sat', 'dow_Sun', 'dow_Thu', 'dow_Tue',
#        'dow_Wed', 'heavy_rain', '東京_rainfall', '東京_temperature',
#        '東京_dew_point_humidity', '東京_humidity', '東京_windspeed_value',
#        '東京_sunshine_hours', '東京_global_solar_radiation', '東京_snowfall',
#        '東京_weather_みぞれ', '東京_weather_快晴', '東京_weather_晴れ', '東京_weather_曇',
#        # '東京_weather_氷あられ', # ない時もある
#        '東京_weather_薄曇', '東京_weather_雨', '東京_weather_雪',
#        '東京_weather_雷電', '八王子_rainfall', '八王子_temperature',
#        '八王子_windspeed_value', '八王子_sunshine_hours']
# df = df[new_cols]
# df

### Save

In [464]:
# df.to_csv('/Users/koki/PycharmProjects/MasterThesis/data/preprocessed/' + data_period + '_10areas_for_analysis.csv', encoding='utf-8')