# 時系列データ処理

In [0]:
import numpy as np
import pandas as pd
import pandas_datareader.data as pdr

import matplotlib.pyplot as plt
%matplotlib inline

In [0]:
# 為替レートデータ（DEXJPUS）をロード
start_date = '2001/1/2'
end_date = '2016/12/30'

fx_jpusdata = pdr.DataReader('DEXJPUS', 'fred', start_date, end_date)
fx_jpusdata.head(20)

In [0]:
# 特定の年月のデータを参照する
fx_jpusdata['2016-04'].head()

### resample

In [0]:
# 月の平均値を表示
fx_jpusdata.resample('M').mean().head()

In [0]:
# 月末レートを表示
fx_jpusdata.resample('M').last().head()

In [0]:
# 毎月15日と月末を表示
fx_jpusdata.resample('SM').mean().head()

In [0]:
# 4半期毎に表示
fx_jpusdata.resample('Q').mean().head()

### shift

In [0]:
# データをずらす
fx_jpusdata.shift(1).head()

In [0]:
# データをずらす
fx_jpusdata.shift(3).head()

In [0]:
# データをずらして変化の差を計算する
fx_jpsdata_diff = fx_jpusdata - fx_jpusdata.shift(1)
fx_jpsdata_diff.head()

In [0]:
# データの変化の差を計算する
fx_jpsdata_diff = fx_jpusdata.diff()
fx_jpsdata_diff.head()

In [0]:
# データを2日分ずらして変化の差を計算する
fx_jpsdata_diff = fx_jpusdata - fx_jpusdata.shift(2)
fx_jpsdata_diff.head()

In [0]:
# データの変化の差を計算する
fx_jpsdata_diff = fx_jpusdata.diff(2)
fx_jpsdata_diff.head()

In [0]:
# データをずらして変化率を計算する
fx_jpsdata_ratio = (fx_jpusdata - fx_jpusdata.shift(1)) / fx_jpusdata.shift(1)
fx_jpsdata_ratio.head()

In [0]:
# データの変化の比率を計算する
fx_jpsdata_ratio = fx_jpusdata.pct_change()
fx_jpsdata_ratio.head()

### rolling

In [0]:
# 3日間の移動平均を計算
fx_jpusdata_full = fx_jpusdata.dropna()
print(fx_jpusdata_full.head())  # 比較用

fx_jpusdata_full.rolling(3).mean().head()

In [0]:
# 3日間の標準偏差を計算
fx_jpusdata_full = fx_jpusdata.dropna()
print(fx_jpusdata_full.head())  # 比較用

fx_jpusdata_full.rolling(3).std().head()

### Q．年ごとの各平均値を表示してください。

### Ｑ．10日間の移動平均を作成してください。ただし、移動平均の計算前にNaNは削除してください。

#  時系列の可視化

In [0]:
fx_jpusdata.plot()

In [0]:
# 期間指定
fx_jpusdata.loc['2016-01': '2016-11'].plot()

In [0]:
# 複数のデータの描画
merge_data_jpusdata = pd.concat([
    fx_jpusdata,
    fx_jpusdata.rolling(3).mean(),
    fx_jpusdata.rolling(7).mean(),
], axis=1)
merge_data_jpusdata.columns = ['today', 'roll_3', 'roll_7']
data = merge_data_jpusdata.dropna()
data.head()

In [0]:
data.loc['2016-07': '2016-11', ['today', 'roll_3', 'roll_7'] ].plot()

In [0]:
fx_jpusdata.hist()

In [0]:
fx_jpusdata_ratio = fx_jpusdata.pct_change()
fx_jpusdata_ratio.hist(bins=30)

In [0]:
# 複数のデータの描画
merge_data_jpusdata = pd.concat([
    fx_jpusdata,
    fx_jpusdata.rolling(3).mean(),
    fx_jpusdata.rolling(7).mean(),
], axis=1)
merge_data_jpusdata.columns = ['today', 'roll_3', 'roll_7']
data = merge_data_jpusdata.dropna()
data.head()

In [0]:
data.loc['2016-07': '2016-11', ['today', 'roll_3'] ].hist()

# 時系列の予測モデル

In [0]:
from sklearn import linear_model
model = linear_model.LinearRegression()

start_term = '2016-01'
end_term = '2016-12'

In [0]:
data = fx_jpusdata.dropna()

In [0]:
merge_data_jpusdata = pd.concat([
    fx_jpusdata,
    fx_jpusdata.shift(1),
], axis=1)

merge_data_jpusdata.columns = ['today', 'pre_1']
data = merge_data_jpusdata.dropna()
data.head()

In [0]:
train = data.loc[start_term: end_term]
X_train = pd.DataFrame(train['pre_1'])
y_train = train['today']

test = data.loc[end_term]
X_test = pd.DataFrame(test['pre_1'])
y_test = test['today']

In [0]:
model.fit(X_train, y_train)
print(model.score(X_train, y_train))
print(model.score(X_test, y_test))

### Q.１日前のデータに加えて、２日前、3日前のデータを加えて予測モデルを構築して下さい。

### ローリングを用いた予測モデルの構築

In [0]:
merge_data_jpusdata = pd.concat([
    fx_jpusdata,
    fx_jpusdata.shift(1),
    fx_jpusdata.shift(3),
    fx_jpusdata.rolling(3).mean(),
], axis=1)
merge_data_jpusdata.columns = ['today', 'pre_1','pre_3', 'roll_3']
data = merge_data_jpusdata.dropna()
data.head()

In [0]:
for pre_list in (['pre_1'], ['pre_1', 'pre_3'], ['pre_1', 'pre_3', 'roll_3']):
  
  print(pre_list)
  train = data.loc[start_term: end_term]
  X_train = pd.DataFrame(train[pre_list])
  y_train = train['today']
  
  test = data.loc[end_term]
  X_test = pd.DataFrame(test[pre_list])
  y_test = test['today']
  
  fit_model = model.fit(X_train, y_train)
  print(fit_model.score(X_train, y_train))
  print(fit_model.score(X_test, y_test))
  print('\n')

In [0]:
model.fit(X_train, y_train)
print(model.score(X_train, y_train))
print(model.score(X_test, y_test))

### ドル -ユーロ情報の追加

In [0]:
# 為替レートデータをロード
start_date = '2001/1/2'
end_date = '2016/12/30'

fx_jpusdata = pdr.DataReader('DEXJPUS', 'fred', start_date, end_date)
fx_useudata = pdr.DataReader('DEXUSEU', 'fred', start_date, end_date)

In [0]:
fx_useudata.plot()

In [0]:
from sklearn import linear_model
model = linear_model.LinearRegression()

start_term = '2016-01'
end_term = '2016-12'

In [0]:
merge_data_jpusdata = pd.concat([
    fx_jpusdata,
    fx_jpusdata.shift(1),
    fx_jpusdata.rolling(3).mean(),
    fx_useudata.shift(1)
], axis=1)
merge_data_jpusdata.columns = ['today','pre_1', 'roll_3', 'useu_pre_1']
data = merge_data_jpusdata.dropna()
data.head()

In [0]:
for pre_list in (['pre_1'], ['pre_1', 'roll_3'], ['pre_1', 'roll_3', 'useu_pre_1']):
  
  print(pre_list)
  train = data.loc[start_term: end_term]
  X_train = pd.DataFrame(train[pre_list])
  y_train = train['today']
  
  test = data.loc[end_term]
  X_test = pd.DataFrame(test[pre_list])
  y_test = test['today']
  
  fit_model = model.fit(X_train, y_train)
  print(fit_model.score(X_train, y_train))
  print(fit_model.score(X_test, y_test))
  print('\n')

### Q.ドル - ユーロの情報を追加して精度を高めて下さい。