In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import load_diabetes
diabets = load_diabetes()

In [3]:
from sklearn.metrics import r2_score, mean_squared_error

In [4]:
diabets

{'DESCR': '.. _diabetes_dataset:\n\nDiabetes dataset\n----------------\n\nTen baseline variables, age, sex, body mass index, average blood\npressure, and six blood serum measurements were obtained for each of n =\n442 diabetes patients, as well as the response of interest, a\nquantitative measure of disease progression one year after baseline.\n\n**Data Set Characteristics:**\n\n  :Number of Instances: 442\n\n  :Number of Attributes: First 10 columns are numeric predictive values\n\n  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n\n  :Attribute Information:\n      - age     age in years\n      - sex\n      - bmi     body mass index\n      - bp      average blood pressure\n      - s1      tc, total serum cholesterol\n      - s2      ldl, low-density lipoproteins\n      - s3      hdl, high-density lipoproteins\n      - s4      tch, total cholesterol / HDL\n      - s5      ltg, possibly log of serum triglycerides level\n      - s6      glu, b

In [5]:
df = pd.DataFrame(diabets.data,columns=diabets.feature_names)
df['target'] = diabets.target
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0


In [6]:
from sklearn.linear_model import LinearRegression

In [7]:
for feature in df.columns[:-1]:
  lr = LinearRegression()
  X = df[feature].values.reshape(-1,1)
  lr.fit(X, diabets.target)
  score = lr.score(X, diabets.target)
  print(f'{feature}:\t{score:.4f}')

age:	0.0353
sex:	0.0019
bmi:	0.3439
bp:	0.1949
s1:	0.0450
s2:	0.0303
s3:	0.1559
s4:	0.1853
s5:	0.3202
s6:	0.1463


In [8]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(
    diabets.data,diabets.target, test_size=0.2, random_state=2022
)

In [9]:
lr = LinearRegression()
lr.fit(X_train,y_train)

LinearRegression()

In [10]:
lr.score(X_train,y_train)

0.5146856519908352

In [11]:
lr.coef_

array([   4.82162607, -235.07445037,  517.2244014 ,  368.79040457,
       -465.12359705,  183.21024117,  -55.09170969,   83.81752251,
        644.94835298,   81.97954842])

In [12]:
lr.intercept_

152.99098267465175

In [13]:
pred_lr = lr.predict(X_test)

In [14]:
pred_lr

array([ 72.06829346,  57.44230572, 220.44722097, 145.12682042,
       168.26707061, 189.33256404, 175.7665303 , 120.15601186,
       274.8139244 , 128.66322433,  92.37384112, 211.49104519,
        72.03247279, 184.89868675, 116.4675909 , 272.62785944,
       132.95725995, 199.66883603, 167.37664304, 107.74084333,
       106.28207429,  89.30642678, 185.34965381, 107.42172376,
       300.6149296 , 164.26758223, 217.91383701,  96.11825254,
       145.84410922, 157.13629245,  54.48182464, 233.86761195,
       239.74575771, 168.76088885, 163.10246909,  79.50947402,
        81.60976141, 250.77373601, 117.82518626, 168.49080383,
       166.24787338, 143.04460958,  90.70616909,  73.3517584 ,
       266.29670477,  65.83885945, 160.55866391, 113.91892297,
       118.9502817 , 214.36479375, 100.33101987, 288.72687732,
       135.12840829, 155.35018646, 215.38902968, 256.5877121 ,
       215.60124473, 162.14021565, 134.65176492, 116.88009065,
       164.64300985, 199.14139902, 160.27377506,  57.83

### SVM

In [15]:
from sklearn.svm import SVR
svr = SVR()
svr.fit(X_train,y_train)

SVR()

In [16]:
pred_sv = svr.predict(X_test)
r2_sv = r2_score(y_test, pred_sv)
mse_sv = mean_squared_error(y_test,pred_sv)

In [17]:
pred_sv

array([130.59595494, 120.60577746, 148.64961136, 135.17399148,
       148.12448045, 157.2164129 , 149.01680322, 142.16273914,
       155.10408839, 127.96005333, 119.5925381 , 150.7286198 ,
       117.50028755, 147.96881768, 128.63981164, 155.07218093,
       125.3729997 , 148.15339316, 135.63525928, 128.14964384,
       132.60475382, 127.80481579, 150.75200328, 137.32183375,
       160.52049077, 146.92298006, 152.73225624, 131.03919019,
       133.71272584, 134.40596282, 119.62785555, 152.76054302,
       148.37898369, 150.93553035, 146.3794634 , 129.08160681,
       130.60589374, 153.82812789, 122.91511391, 143.9720035 ,
       140.84496205, 135.84628288, 124.07478972, 125.32647206,
       159.60741881, 120.62113625, 143.05992026, 132.99868499,
       143.07968565, 146.4889046 , 121.82537787, 151.09868098,
       131.02961604, 133.61602467, 161.47495438, 153.57230679,
       160.0826145 , 144.07051599, 131.09384333, 133.47914313,
       146.23728789, 146.94451908, 143.74257622, 116.27

### RFR

In [18]:
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor()
rfr.fit(X_train,y_train)
pred_rf = rfr.predict(X_test)
pred_rf

array([135.46,  76.54, 196.62, 125.79, 176.09, 201.96, 152.54, 152.45,
       237.05,  86.91,  90.41, 189.96,  87.44, 168.01, 109.5 , 211.32,
       125.66, 219.08, 148.85, 152.44, 145.7 , 133.1 , 188.59, 146.37,
       291.31, 173.99, 223.75, 105.56, 157.01,  96.71,  71.48, 177.96,
       213.41, 196.01, 155.79,  88.55,  97.73, 233.11, 135.41, 179.19,
       199.5 , 146.04,  87.2 , 127.44, 259.7 ,  73.45, 136.28,  98.62,
       134.04, 138.5 ,  96.  , 253.33, 111.54, 134.74, 249.86, 219.36,
       222.34, 179.4 , 145.37, 124.3 , 179.33, 212.27, 172.8 ,  72.51,
       102.67, 242.08, 144.03, 172.27, 166.51, 105.  , 247.9 ,  89.35,
       133.08, 126.61,  92.93, 178.01, 152.94, 239.19, 105.24, 157.14,
        99.72, 111.22, 115.81, 164.16, 201.88,  96.04, 100.71, 152.09,
       176.46])

### XGR

In [19]:
from xgboost import XGBRegressor
xgr = XGBRegressor()
xgr.fit(X_train,y_train)
pred_xg = xgr.predict(X_test)
pred_xg



array([129.2174  ,  66.95242 , 196.74918 , 130.5155  , 195.73079 ,
       177.29326 , 170.49132 , 111.87564 , 242.25333 , 107.03356 ,
        86.65604 , 167.2946  ,  67.98013 , 163.70796 , 118.28321 ,
       232.3803  , 105.61832 , 212.3893  , 159.34375 , 153.54292 ,
       119.696724, 136.53954 , 201.06203 , 114.58666 , 270.7618  ,
       169.42706 , 206.84782 ,  97.91326 , 123.01128 ,  86.76903 ,
        75.318085, 173.0245  , 229.43625 , 192.60301 , 159.32776 ,
        72.40596 ,  84.46901 , 261.42325 , 141.23476 , 187.70683 ,
       203.16484 , 124.71587 ,  97.37444 , 114.05205 , 261.1226  ,
        58.263012, 160.46829 ,  95.38075 ,  94.130005, 150.09071 ,
        85.82457 , 272.40448 ,  99.575966, 138.89734 , 255.88596 ,
       228.93898 , 226.34468 , 177.13745 , 183.17265 , 115.6763  ,
       179.33336 , 222.68669 , 200.57024 ,  79.92129 ,  90.621635,
       234.47954 , 173.06348 , 207.89694 , 167.75906 , 103.986046,
       261.07776 ,  87.808205, 128.87663 , 108.998314,  97.669

In [25]:
!jupyter nbconvert --to html /content/drive/MyDrive/machine/0427/과제/문석찬.ipynb

[NbConvertApp] Converting notebook /content/drive/MyDrive/machine/0427/과제/문석찬.ipynb to html
[NbConvertApp] Writing 306021 bytes to /content/drive/MyDrive/machine/0427/과제/문석찬.html
