In [1]:
# libraries
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from random import randint
from collections import Counter

# dataset
from sklearn import datasets

In [2]:
wine = datasets.load_wine()

In [3]:
X_columns = wine.feature_names
X = wine.data
y = wine.target

In [4]:
def print_random_sample(X_columns, X, y):
    sample_number = randint(1, X.shape[0])
    X_sample, y_sample = X[sample_number], y[sample_number]

    for column, value in zip(X_columns, X_sample):
        print('{}: {}'.format(column, value))
    print('Target: {}'.format(y_sample))

In [5]:
print_random_sample(X_columns, X, y)

alcohol: 13.56
malic_acid: 1.71
ash: 2.31
alcalinity_of_ash: 16.2
magnesium: 117.0
total_phenols: 3.15
flavanoids: 3.29
nonflavanoid_phenols: 0.34
proanthocyanins: 2.34
color_intensity: 6.13
hue: 0.95
od280/od315_of_diluted_wines: 3.38
proline: 795.0
Target: 0


In [6]:
## Optional
# scaler = StandardScaler()
# X_std = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [7]:
Counter(y_train)

Counter({0: 52, 1: 64, 2: 44})

In [8]:
# Logistic regression object with one-vs-rest and L1 penalty
clf = LogisticRegression(random_state=101, multi_class='ovr', penalty='l1')

In [9]:
# Train model
model = clf.fit(X_train, y_train)

In [10]:
clf.score(X_test, y_test) 

0.94444444444444442

In [11]:
clf.score(X_train, y_train)

0.97499999999999998

In [12]:
model.coef_

array([[ -5.48446420e-01,   6.54750592e-01,   1.49059090e+00,
         -5.94971467e-01,  -3.07808870e-02,   0.00000000e+00,
          1.69668404e+00,   0.00000000e+00,   0.00000000e+00,
         -5.96690739e-03,   0.00000000e+00,   1.68861620e-01,
          1.49413352e-02],
       [  1.14546003e+00,  -1.34172594e+00,   0.00000000e+00,
          2.24812737e-01,  -2.57078275e-03,   0.00000000e+00,
          7.10419656e-01,   0.00000000e+00,   6.92437291e-01,
         -2.17898872e+00,   6.65525543e-03,   0.00000000e+00,
         -1.32495469e-02],
       [ -1.35025352e-03,   5.94421944e-01,   0.00000000e+00,
          2.11488308e-02,   8.27283185e-03,   0.00000000e+00,
         -3.03088814e+00,   0.00000000e+00,   0.00000000e+00,
          9.24459921e-01,   0.00000000e+00,  -1.56198320e+00,
         -7.95335337e-04]])

In [13]:
scores = cross_val_score(clf, X, y, cv=5, scoring='f1_macro')
scores

array([ 0.91991585,  0.94515263,  0.94747475,  1.        ,  1.        ])