# Lecture 10 ディープラーニングに挑戦

## モデル

In [None]:
# https://dev.to/nitdgplug/stonksmaster-predict-stock-prices-using-python-ml-3hmc
# まずはWebページにあるPythonのプログラムを動かしてみる

In [None]:
import pandas as pd
import numpy as np
import pandas_datareader as pdr

df = pdr.get_data_yahoo("AAPL", "2010-11-01", "2020-11-01")

In [None]:
# データの中身を表示
df[:10]

In [None]:
# 学習用のデータを抽出
from sklearn.model_selection import train_test_split

prices = df[df.columns[0:1]]
prices.reset_index(level=0, inplace=True)
prices["timestamp"] = pd.to_datetime(df.index).astype(int) // (10**9)
prices = prices.drop(["Date"], axis=1)

In [None]:
prices[:10]  # 訓練用に変換したデータを表示

In [None]:
# 訓練用，確認用のデータを分割
dataset = prices.values
X = dataset[:,1].reshape(-1,1)
Y = dataset[:,0:1]

validation_size = 0.15
seed = 7

X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size=validation_size, random_state=seed)

In [None]:
# 色々なモデルで学習
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

# Test options and evaluation metric
num_folds = 10
seed = 7
scoring = "r2"

# Spot-Check Algorithms
models = []
models.append((' LR ', LinearRegression()))
models.append((' LASSO ', Lasso()))
models.append((' EN ', ElasticNet()))
models.append((' KNN ', KNeighborsRegressor()))
models.append((' CART ', DecisionTreeRegressor()))

In [None]:
# 学習と精度を表示
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = KFold(n_splits=num_folds, random_state=seed, shuffle=True)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
    # print(cv_results)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

In [None]:
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error

# Define model
model = DecisionTreeRegressor()
# Fit to model
model.fit(X_train, Y_train)
# predict
predictions = model.predict(X)
print(mean_squared_error(Y, predictions))

# %matplotlib inline 
fig= plt.figure(figsize=(24,12))
plt.plot(X,Y, c="b")
plt.plot(X,predictions, c="r")
plt.show()

In [None]:
# 書き換える
url = "https://raw.githubusercontent.com/shibats/tdl_python_basic/main/Lecture05/stock_chart.csv"
pd = pd.read_csv(url)

In [None]:
# データの中身を表示
df[:10]

In [None]:
# 学習用のデータを抽出

# データの成形の仕方を変更，highとtimestampだけを残す
# さらに最後の5日を取り除く
prices = df[df.columns[0:3]][5:]
prices.reset_index(level=0, inplace=True)
prices["timestamp"] = pd.to_datetime(prices.date).astype(int) // (10**9)
prices = prices.drop(["index", "date", "open"], axis=1)

In [None]:
prices[:10]  # 訓練用に変換したデータを表示

In [None]:
# 訓練用，確認用のデータを分割
dataset = prices.values
X = dataset[:,1].reshape(-1,1)
Y = dataset[:,0:1]

validation_size = 0.15
seed = 7

X_train, X_validation, Y_train, Y_validation = train_test_split(X, Y, test_size=validation_size, random_state=seed)

In [None]:
# 色々なモデルで学習
# Test options and evaluation metric
num_folds = 10
seed = 7
scoring = "r2"

# Spot-Check Algorithms
models = []
models.append((' LR ', LinearRegression()))
models.append((' LASSO ', Lasso()))
models.append((' EN ', ElasticNet()))
models.append((' KNN ', KNeighborsRegressor()))
models.append((' CART ', DecisionTreeRegressor()))

In [None]:
# 学習と精度を表示

# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = KFold(n_splits=num_folds, random_state=seed, shuffle=True)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
    # print(cv_results)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

In [None]:
# 学習データから5日先のデータを作る(元のデータ)
prices = df[df.columns[0:3]]
prices.reset_index(level=0, inplace=True)
prices["timestamp"] = pd.to_datetime(prices.date).astype(int) // (10**9)
prices = prices.drop(["index", "date", "open"], axis=1)
dataset = prices.values
X2 = dataset[:,1].reshape(-1,1)
Y2 = dataset[:,0:1]

In [None]:
# Define model
# LinearRegression, Lasso, ElasticNet,
# KNeighborsRegressor, DecisionTreeRegressor
model = DecisionTreeRegressor()
# Fit to model
model.fit(X_train, Y_train)
# predict
predictions = model.predict(X)

# %matplotlib inline 
fig= plt.figure(figsize=(24,12))
plt.plot(X[:50],Y[:50], c="b")
plt.plot(X[:50],predictions[:50], c="r")
plt.show()