In [34]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_diabetes
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import r2_score,mean_squared_error

In [25]:
diabetes = load_diabetes()
df = pd.DataFrame(diabetes.data,columns=diabetes.feature_names)

In [29]:
df['target'] = diabetes.target

In [30]:
df

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068330,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018118,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017282,-0.037344,-0.013840,-0.024993,-0.011080,-0.046879,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044528,-0.025930,220.0


In [17]:
print(diabetes.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - Age
      - Sex
      - Body mass index
      - Average blood pressure
      - S1
      - S2
      - S3
      - S4
      - S5
      - S6

Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).

Source URL:
https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html

For more information see:
Bra

In [31]:
x = df.drop('target',axis=1)
y = df['target']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

In [41]:
r2score_list = []
mse_list = []

norm_list = [MinMaxScaler,StandardScaler]
n_com = [2,3,4,5,6,7,8,9,10]
for norm_item in norm_list:
    for n_c in n_com:
        my_norm = norm_item()
        x_train_l = my_norm.fit_transform(x_train)
        x_test_l = my_norm.transform(x_test)
        my_pca = PCA(n_components=n_c)
        x_train_l = my_pca.fit_transform(x_train_l)
        x_test_l = my_pca.transform(x_test_l)
        reg = LinearRegression()
        reg.fit(x_train_l,y_train)
        y_pred = reg.predict(x_test_l)
        r2score_list.append(r2_score(y_test,y_pred))
        mse_list.append(mean_squared_error(y_test,y_pred))
        
        
        
        
        

In [42]:
cnt = 0
for norm_item in norm_list:
    for n_c in n_com:
        print(f'{norm_item}  :  {n_c}   {r2score_list[cnt]}  : {mse_list[cnt]}')
        cnt += 1

<class 'sklearn.preprocessing.data.MinMaxScaler'>  :  2   0.2430285737126513  : 3983.7667835716
<class 'sklearn.preprocessing.data.MinMaxScaler'>  :  3   0.2411469065633005  : 3993.6695656674674
<class 'sklearn.preprocessing.data.MinMaxScaler'>  :  4   0.38662353363955004  : 3228.0594850137777
<class 'sklearn.preprocessing.data.MinMaxScaler'>  :  5   0.4084237226086619  : 3113.3300967240743
<class 'sklearn.preprocessing.data.MinMaxScaler'>  :  6   0.39025301361082176  : 3208.9583654086346
<class 'sklearn.preprocessing.data.MinMaxScaler'>  :  7   0.3946384994548705  : 3185.8785605063117
<class 'sklearn.preprocessing.data.MinMaxScaler'>  :  8   0.4014154013089297  : 3150.2132823145857
<class 'sklearn.preprocessing.data.MinMaxScaler'>  :  9   0.4013977186053632  : 3150.3063423227522
<class 'sklearn.preprocessing.data.MinMaxScaler'>  :  10   0.4002251891677817  : 3156.477095489892
<class 'sklearn.preprocessing.data.StandardScaler'>  :  2   0.20910367892469806  : 4162.305714235419
<class 's