##### 程式來源：https://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html
##### prepared for *** Introduction to Machine Learning @NCCU *** by Chao-Lin Liu

In [None]:
%matplotlib inline


# Linear Regression Example

This example uses the only the first feature of the `diabetes` dataset, in
order to illustrate a two-dimensional plot of this regression technique. The
straight line can be seen in the plot, showing how linear regression attempts
to draw a straight line that will best minimize the residual sum of squares
between the observed responses in the dataset, and the responses predicted by
the linear approximation.

The coefficients, the residual sum of squares and the variance score are also
calculated.




In [None]:
print(__doc__)


# Code source: Jaques Grobler
# License: BSD 3 clause


import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

# Load the diabetes dataset
diabetes = datasets.load_diabetes()

#### diabetes 裡面有很多欄位，其中 data 儲存可能跟 diabetes 相關的生理表徵資料，每一筆資料有十個數字

In [None]:
diabetes.data[0:5]

In [None]:
# Use only one feature
# 現在為了展示 y=f(x) 這種單變數關係，這一個範例程式只有抽取上面十個數字的其中一個
diabetes_X = diabetes.data[:, np.newaxis, 2]
diabetes_X[0:5]

In [None]:
# Split the data into training/testing sets
# 然後我們把表徵的資料切割成訓練資料和測試資料
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]

In [None]:
# diabetes.target 是各筆病人資料相對的 diabetes 指數；也就是我們目前示範的 y 值
diabetes.target[0:5]

In [None]:
# Split the targets into training/testing sets
# 將 diabetes 指數也切割成訓練資料和測試資料
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]

In [None]:
# Create linear regression object
# 選定所要使用的模型
regr = linear_model.LinearRegression()

# Train the model using the training sets
# 執行機器學習
regr.fit(diabetes_X_train, diabetes_y_train)

# Make predictions using the testing set
# 依據所習得的模型，依據測試資料的表徵數值，產生模型所預測的 diabetes 指數
diabetes_y_pred = regr.predict(diabetes_X_test)

In [None]:
# 列印所習得的係數和測試誤差
# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred))

In [None]:
#繪製圖表
# Plot outputs
plt.scatter(diabetes_X_test, diabetes_y_test,  color='black')
plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)

plt.xticks(())
plt.yticks(())

plt.show()