In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from cycle2.my_polynomial_regression import MyPolynomialRegression

In [2]:
# 確認問題

# データ読み込み
data = pd.read_csv('cycle2/resources/tallweight.csv')
x_train = data["Weight"].values
t_train = data['Tall'].values

# 学習
ply = MyPolynomialRegression()
ply.fit(x_train, t_train)

# 予測結果
print("if Weight = 35.6, then Tall = %s." % ply.predict([35.6]))
print("if Weight = 40.7, then Tall = %s." % ply.predict([40.7]))

if Weight = 35.6, then Tall = [138.18115595320842].
if Weight = 40.7, then Tall = [147.60120273695125].


In [3]:
# 実践問題

# 学習データ読み込み
data = pd.read_csv('cycle1/resources/sin.csv', header=None, names=('x', 'y')).sort_values('x')
x_train = data['x'].values
t_train = data['y'].values

# 元データをプロット
plt.plot(x_train, t_train, 'o', color='black')

# 多項式回帰(M=3)
ply.fit(x_train, t_train, degree=3)
x_test = np.arange(0, 1, 0.01)
t_test = ply.predict(x_test)
plt.plot(x_train, t_test, label='M = %s' % 3, color='blue')
plt.legend()

# show
plt.show()

In [4]:
# sklearn を使う

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

# modelを作成
model = Pipeline([('poly_sk', PolynomialFeatures(degree=3)),
                  ('linear', LinearRegression(fit_intercept=False))])

# このライブラリにあうように元データを変形
# こっちのほうがいい。なぜなら僕が作ったものはINPUTデータが1次元に限ってしまうので。
x_train_for_sklearn = np.array(x_train)[:, np.newaxis]
t_train_for_sklearn = np.array(t_train)[:, np.newaxis]

# 学習
model.fit(x_train_for_sklearn, t_train_for_sklearn)

# 元データをプロット
plt.plot(x_train, t_train, 'o', color='black')

# 学習した曲線をプロット
plt.plot(x_train, model.predict(x_train_for_sklearn), color='red')

# show
plt.show()

In [5]:
# 次数を3, 10, 200 と増やす

# 元データをプロット
plt.plot(x_train, t_train, 'o', color='black')

x_test = np.arange(0,1,0.01)
for k in [3, 10, 200]:
    ply.fit(x_train, t_train, degree=k)
    t_test = ply.predict(x_test)
    plt.plot(x_train, t_test, label="M=%s" % k)
plt.legend()
plt.show()

In [None]:
# (extra) Ridge Regression

# 元データをプロット
plt.plot(x_train, t_train, 'o', color='black')

x_test = np.arange(0,1,0.01)
# 重みを指定して回帰
ply_ridge = MyPolynomialRegression()
for k in [3, 10, 200]:
    ply_ridge.fit(x_train, t_train, degree=k, ridge_param=0.01)
    t_test = ply_ridge.predict(x_test)
    plt.plot(x_train, t_test, label="M=%s" % k)
plt.legend()
plt.show()