In [2]:
import pandas as pd

wines = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data", names=['classe','alcol','flavonoidi'], 
                    usecols=[0,1,7])
wines.head()

Unnamed: 0,classe,alcol,flavonoidi
0,1,14.23,3.06
1,1,13.2,2.76
2,1,13.16,3.24
3,1,14.37,3.49
4,1,13.24,2.69


In [3]:
Y = wines['classe'].values
X = wines.drop('classe',axis=1).values

In [4]:
wines.describe()

Unnamed: 0,classe,alcol,flavonoidi
count,178.0,178.0,178.0
mean,1.938202,13.000618,2.02927
std,0.775035,0.811827,0.998859
min,1.0,11.03,0.34
25%,1.0,12.3625,1.205
50%,2.0,13.05,2.135
75%,3.0,13.6775,2.875
max,3.0,14.83,5.08


## Normalizzazione

In [6]:
# Normalizzazione: porta i range di valori fra 0 e 1
#
# Con pandas
wines_norm = wines.copy()
# colonne del dataframe da normalizzare
features = ["alcol","flavonoidi"] 
to_norm = wines_norm[features]
# implementiamo l'algoritmo della normalizzazione
# e lo eseguiamo su tutte le colonne da normalizzare
wines_norm[features] = (to_norm - to_norm.min())/(to_norm.max() - to_norm.min()) 
                                                                             
wines_norm.head()

Unnamed: 0,classe,alcol,flavonoidi
0,1,0.842105,0.57384
1,1,0.571053,0.510549
2,1,0.560526,0.611814
3,1,0.878947,0.664557
4,1,0.581579,0.495781


In [7]:
# Con numpy
from sklearn.preprocessing import MinMaxScaler

mms = MinMaxScaler()
X_norm = X.copy()
X_norm = mms.fit_transform(X_norm)
X_norm[:5]

array([[0.84210526, 0.57383966],
       [0.57105263, 0.51054852],
       [0.56052632, 0.61181435],
       [0.87894737, 0.66455696],
       [0.58157895, 0.49578059]])

## Standardizzazione

In [8]:
# Standardizzazione: porta i range di valori fra -1 e 1, centrato sullo 0
# media 0 e deviazione standard 1
#
# Con pandas
wines_std = wines.copy()

features = ["alcol","flavonoidi"]
to_std = wines_std[features]
wines_std[features] = (to_std - to_std.mean())/to_std.std()
wines_std[:5]

Unnamed: 0,classe,alcol,flavonoidi
0,1,1.514341,1.031908
1,1,0.245597,0.731565
2,1,0.196325,1.212114
3,1,1.686791,1.462399
4,1,0.294868,0.661485


In [9]:
# Con numpy
from sklearn.preprocessing import StandardScaler

X_std = X.copy()
ss = StandardScaler()
X_std = ss.fit_transform(X_std)
X_std[:5]

array([[1.51861254, 1.03481896],
       [0.24628963, 0.73362894],
       [0.19687903, 1.21553297],
       [1.69154964, 1.46652465],
       [0.29570023, 0.66335127]])