In [142]:
import numpy as np
from sklearn.datasets import load_linnerud
import pandas as pd

import scipy as sp
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

In [143]:
#データの詳細
linnerud = load_linnerud()
print(linnerud.DESCR)
linnerud.frame

.. _linnerrud_dataset:

Linnerrud dataset
-----------------

**Data Set Characteristics:**

    :Number of Instances: 20
    :Number of Attributes: 3
    :Missing Attribute Values: None

The Linnerud dataset is a multi-output regression dataset. It consists of three
excercise (data) and three physiological (target) variables collected from
twenty middle-aged men in a fitness club:

- *physiological* - CSV containing 20 observations on 3 physiological variables:
   Weight, Waist and Pulse.
- *exercise* - CSV containing 20 observations on 3 exercise variables:
   Chins, Situps and Jumps.

.. topic:: References

  * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris:
    Editions Technic.



In [144]:
"""
説明変数
Chins　懸垂
Situps　上体起こし
Jumps　ジャンプ回数
"""
linnerudFrame = pd.DataFrame(linnerud.data,columns=linnerud.feature_names)
linnerudFrame.head(5)

Unnamed: 0,Chins,Situps,Jumps
0,5.0,162.0,60.0
1,2.0,110.0,60.0
2,12.0,101.0,101.0
3,12.0,105.0,37.0
4,13.0,155.0,58.0


In [145]:
"""
target変数
Weight　体重　単位はおそらくポンド
Waist　ウエスト　おそらくインチ
Pulse　心拍数
"""
linnerudTarget = pd.DataFrame(linnerud.target,columns=linnerud.target_names)
linnerudTarget.head(5)

Unnamed: 0,Weight,Waist,Pulse
0,191.0,36.0,50.0
1,189.0,37.0,52.0
2,193.0,38.0,58.0
3,162.0,35.0,62.0
4,189.0,35.0,46.0


In [146]:
#運動データ三種から身体データを重回帰分析してみる

In [147]:
#体重の推定

In [148]:
x1 = linnerudFrame[["Chins", "Situps","Jumps"]].values
x1

array([[  5., 162.,  60.],
       [  2., 110.,  60.],
       [ 12., 101., 101.],
       [ 12., 105.,  37.],
       [ 13., 155.,  58.],
       [  4., 101.,  42.],
       [  8., 101.,  38.],
       [  6., 125.,  40.],
       [ 15., 200.,  40.],
       [ 17., 251., 250.],
       [ 17., 120.,  38.],
       [ 13., 210., 115.],
       [ 14., 215., 105.],
       [  1.,  50.,  50.],
       [  6.,  70.,  31.],
       [ 12., 210., 120.],
       [  4.,  60.,  25.],
       [ 11., 230.,  80.],
       [ 15., 225.,  73.],
       [  2., 110.,  43.]])

In [149]:
y1 = linnerudTarget["Weight"].values
y1

array([191., 189., 193., 162., 189., 182., 211., 167., 176., 154., 169.,
       166., 154., 247., 193., 202., 176., 157., 156., 138.])

In [150]:
x1Train, x1Test, y1Train, y1Test = train_test_split(x1, y1, test_size=0.2, random_state=0)

In [151]:
print(x1Train.shape)
print(x1Test.shape)
print(y1Train.shape)
print(y1Test.shape)

(16, 3)
(4, 3)
(16,)
(4,)


In [152]:
#重回帰で学習
modelForWeight = LinearRegression()
modelForWeight.fit(x1Train, y1Train)

LinearRegression()

In [153]:
#実際にテスト用データで予測してみる
yPredict = modelForWeight.predict(x1Test)
yPredict

array([153.63090772, 203.26736591, 200.86145905, 153.90911951])

In [154]:
#対応する正解データ
y1Test[:5]

array([156., 189., 138., 176.])

In [155]:
#平均二乗誤差を見てみる
print("RMSE...", np.sqrt(mean_squared_error(y1Test, yPredict)))

RMSE... 34.09083587423487


In [156]:
#今度はウエストの推定

In [157]:
x2 = linnerudFrame[["Chins", "Situps","Jumps"]].values
y2 = linnerudTarget["Waist"].values
y2

array([36., 37., 38., 35., 35., 36., 38., 34., 31., 33., 34., 33., 34.,
       46., 36., 37., 37., 32., 33., 33.])

In [158]:
x2Train, x2Test, y2Train, y2Test = train_test_split(x2, y2, test_size=0.2, random_state=0)

In [159]:
#学習
modelForWaist = LinearRegression()
modelForWaist.fit(x2Train, y2Train)

LinearRegression()

In [160]:
#推定
yPredict = modelForWaist.predict(x2Test)
yPredict

array([31.11656185, 38.83382692, 38.31456627, 31.07098739])

In [161]:
#正解
y2Test

array([33., 37., 33., 31.])

In [162]:
#平均二乗誤差
print("RMSE...", np.sqrt(mean_squared_error(y2Test, yPredict)))

RMSE... 2.9647898003042883
