# Standardscaler (Standardization)

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("autompg.csv")

In [3]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [4]:
df.tail()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
393,27.0,4,140.0,86,2790,15.6,82,1,ford mustang gl
394,44.0,4,97.0,52,2130,24.6,82,2,vw pickup
395,32.0,4,135.0,84,2295,11.6,82,1,dodge rampage
396,28.0,4,120.0,79,2625,18.6,82,1,ford ranger
397,31.0,4,119.0,82,2720,19.4,82,1,chevy s-10


In [5]:
df.shape

(398, 9)

In [7]:
df.dtypes

mpg             float64
cylinders         int64
displacement    float64
horsepower       object
weight            int64
acceleration    float64
model year        int64
origin            int64
car name         object
dtype: object

In [9]:
X = df.iloc[:, [2,4]]
y = df.iloc[:, 5]

In [10]:
X.head()

Unnamed: 0,displacement,weight
0,307.0,3504
1,350.0,3693
2,318.0,3436
3,304.0,3433
4,302.0,3449


In [11]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 40)

In [13]:
X_train.head()

Unnamed: 0,displacement,weight
256,225.0,3430
295,98.0,1915
100,250.0,3021
253,200.0,3155
369,112.0,2395


In [14]:
scaler = StandardScaler().fit(X_train)

In [15]:
print(scaler)

StandardScaler()


In [16]:
scaler.mean_

array([ 193.29362416, 2978.66107383])

In [17]:
scaler.scale_

array([102.4388434 , 827.30502634])

In [18]:
scaler.transform(X_train)

array([[ 3.09515168e-01,  5.45553226e-01],
       [-9.30248927e-01, -1.28569396e+00],
       [ 5.53563218e-01,  5.11769237e-02],
       [ 6.54671179e-02,  2.13148622e-01],
       [-7.93582019e-01, -7.05496830e-01],
       [-6.95962799e-01, -8.08240072e-01],
       [-4.81200515e-01, -3.79135946e-01],
       [-1.19382082e+00, -1.45733561e+00],
       [ 1.21737392e+00,  1.40376148e+00],
       [-8.61915473e-01, -1.00163911e+00],
       [ 3.77848622e-01,  3.73911576e-01],
       [ 7.19515893e-01,  7.57083429e-01],
       [-8.42391629e-01, -6.63190790e-01],
       [ 1.21737392e+00,  1.46419868e+00],
       [ 6.70706282e-01,  4.39244596e-02],
       [-7.64296253e-01, -3.44082370e-01],
       [ 6.54671179e-02, -1.65127412e-02],
       [ 2.30094726e+00,  2.38526162e+00],
       [-9.40010849e-01, -9.55706842e-01],
       [ 1.06118316e+00,  2.30071038e-01],
       [ 1.09046893e+00,  1.08948803e+00],
       [ 3.68086700e-01,  4.85116025e-01],
       [-6.95962799e-01, -9.17027033e-01],
       [-1.

In [19]:
X_train_scaled = scaler.transform(X_train)

In [20]:
print(X_train_scaled)

[[ 3.09515168e-01  5.45553226e-01]
 [-9.30248927e-01 -1.28569396e+00]
 [ 5.53563218e-01  5.11769237e-02]
 [ 6.54671179e-02  2.13148622e-01]
 [-7.93582019e-01 -7.05496830e-01]
 [-6.95962799e-01 -8.08240072e-01]
 [-4.81200515e-01 -3.79135946e-01]
 [-1.19382082e+00 -1.45733561e+00]
 [ 1.21737392e+00  1.40376148e+00]
 [-8.61915473e-01 -1.00163911e+00]
 [ 3.77848622e-01  3.73911576e-01]
 [ 7.19515893e-01  7.57083429e-01]
 [-8.42391629e-01 -6.63190790e-01]
 [ 1.21737392e+00  1.46419868e+00]
 [ 6.70706282e-01  4.39244596e-02]
 [-7.64296253e-01 -3.44082370e-01]
 [ 6.54671179e-02 -1.65127412e-02]
 [ 2.30094726e+00  2.38526162e+00]
 [-9.40010849e-01 -9.55706842e-01]
 [ 1.06118316e+00  2.30071038e-01]
 [ 1.09046893e+00  1.08948803e+00]
 [ 3.68086700e-01  4.85116025e-01]
 [-6.95962799e-01 -9.17027033e-01]
 [-1.18405890e+00 -1.65073465e+00]
 [-1.11572545e+00 -1.18295072e+00]
 [-8.42391629e-01 -6.22093493e-01]
 [ 1.52975542e+00  1.69386004e+00]
 [-5.20248203e-01 -1.07168542e-01]
 [ 1.52975542e+00  1

In [21]:
print(X_train_scaled.mean(axis=0))

[-9.53748639e-17 -2.14593444e-16]


In [22]:
print(X_train_scaled.std(axis=0))

[1. 1.]


In [25]:
X_test.head()

Unnamed: 0,displacement,weight
142,79.0,1963
135,225.0,3613
77,121.0,2511
246,78.0,1985
118,116.0,2158


In [23]:
scaler = StandardScaler().fit(X_test)

In [24]:
scaler.mean_

array([ 193.82, 2945.88])

In [26]:
scaler.scale_

array([109.04653869, 898.1274885 ])

In [27]:
scaler.transform(X_test)

array([[-1.05294493, -1.09436579],
       [ 0.28593296,  0.74278987],
       [-0.66778828, -0.48420743],
       [-1.06211533, -1.06987038],
       [-0.71364026, -0.8772474 ],
       [ 0.35012574, -0.03549607],
       [-0.71364026, -0.80821488],
       [-0.94290017, -1.10327321],
       [ 1.73485562,  2.23700981],
       [-0.94290017, -1.28142164],
       [ 1.01039429,  0.54237289],
       [-0.3468244 , -0.40181378],
       [-0.88787779, -1.10884035],
       [-0.67695867, -0.87502054],
       [ 1.89075236,  2.00430342],
       [-0.49355074, -0.34168868],
       [ 1.01039429,  0.8084821 ],
       [ 0.9920535 ,  0.56018773],
       [ 1.03790548,  1.59233518],
       [-0.88787779, -0.90842337],
       [ 0.9920535 ,  1.88405323],
       [-0.48438034,  0.27180996],
       [-0.57608431, -0.129024  ],
       [ 1.44140293,  1.31731855],
       [-0.88787779, -0.86277284],
       [-0.94290017, -1.06987038],
       [ 0.58855605,  0.87306091],
       [ 1.79904839,  1.00667223],
       [ 1.43223253,

In [28]:
X_test_scaled = scaler.transform(X_test)

In [29]:
print(X_test_scaled)

[[-1.05294493 -1.09436579]
 [ 0.28593296  0.74278987]
 [-0.66778828 -0.48420743]
 [-1.06211533 -1.06987038]
 [-0.71364026 -0.8772474 ]
 [ 0.35012574 -0.03549607]
 [-0.71364026 -0.80821488]
 [-0.94290017 -1.10327321]
 [ 1.73485562  2.23700981]
 [-0.94290017 -1.28142164]
 [ 1.01039429  0.54237289]
 [-0.3468244  -0.40181378]
 [-0.88787779 -1.10884035]
 [-0.67695867 -0.87502054]
 [ 1.89075236  2.00430342]
 [-0.49355074 -0.34168868]
 [ 1.01039429  0.8084821 ]
 [ 0.9920535   0.56018773]
 [ 1.03790548  1.59233518]
 [-0.88787779 -0.90842337]
 [ 0.9920535   1.88405323]
 [-0.48438034  0.27180996]
 [-0.57608431 -0.129024  ]
 [ 1.44140293  1.31731855]
 [-0.88787779 -0.86277284]
 [-0.94290017 -1.06987038]
 [ 0.58855605  0.87306091]
 [ 1.79904839  1.00667223]
 [ 1.43223253  1.72928679]
 [-0.78700343 -0.75811063]
 [-0.96124096 -1.08879865]
 [ 0.05667305  0.13819864]
 [-0.8787074  -0.76924491]
 [-0.68612907 -0.60223076]
 [ 1.43223253  1.66359456]
 [-0.20926845  0.04244386]
 [-0.39267638 -0.3461424 ]
 

In [30]:
print(X_test_scaled.mean(axis=0))

[ 8.54871729e-17 -1.09912079e-16]


In [31]:
print(X_test_scaled.std(axis=0))

[1. 1.]
