In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_fscore_support

In [2]:
glass = fetch_openml(name='glass')

  warn("Multiple active versions of the dataset matching the name"


In [3]:
glass

{'data': array([[ 1.51793, 12.79   ,  3.5    , ...,  8.77   ,  0.     ,  0.     ],
        [ 1.51643, 12.16   ,  3.52   , ...,  8.53   ,  0.     ,  0.     ],
        [ 1.51793, 13.21   ,  3.48   , ...,  8.43   ,  0.     ,  0.     ],
        ...,
        [ 1.51613, 13.92   ,  3.52   , ...,  7.94   ,  0.     ,  0.14   ],
        [ 1.51689, 12.67   ,  2.88   , ...,  8.54   ,  0.     ,  0.     ],
        [ 1.51852, 14.09   ,  2.19   , ...,  9.32   ,  0.     ,  0.     ]]),
 'target': array(['build wind float', 'vehic wind float', 'build wind float',
        'tableware', 'build wind non-float', 'build wind non-float',
        'vehic wind float', 'build wind float', 'headlamps',
        'build wind non-float', 'build wind non-float',
        'build wind non-float', 'build wind float', 'vehic wind float',
        'vehic wind float', 'build wind non-float', 'headlamps',
        'build wind non-float', 'containers', 'build wind non-float',
        'build wind float', 'build wind non-float', 'bui

In [4]:
glass.data.shape

(214, 9)

In [5]:
glass.target.shape

(214,)

In [6]:
glass.data

array([[ 1.51793, 12.79   ,  3.5    , ...,  8.77   ,  0.     ,  0.     ],
       [ 1.51643, 12.16   ,  3.52   , ...,  8.53   ,  0.     ,  0.     ],
       [ 1.51793, 13.21   ,  3.48   , ...,  8.43   ,  0.     ,  0.     ],
       ...,
       [ 1.51613, 13.92   ,  3.52   , ...,  7.94   ,  0.     ,  0.14   ],
       [ 1.51689, 12.67   ,  2.88   , ...,  8.54   ,  0.     ,  0.     ],
       [ 1.51852, 14.09   ,  2.19   , ...,  9.32   ,  0.     ,  0.     ]])

In [7]:
glass.feature_names

['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe']

In [8]:
X_df = pd.DataFrame(glass.data, columns = glass.feature_names)
X_df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe
0,1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0.0,0.0
1,1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.0,0.0
2,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.0,0.0
3,1.51299,14.4,1.74,1.54,74.55,0.0,7.59,0.0,0.0
4,1.53393,12.3,0.0,1.0,70.16,0.12,16.19,0.0,0.24


In [9]:
glass.target

array(['build wind float', 'vehic wind float', 'build wind float',
       'tableware', 'build wind non-float', 'build wind non-float',
       'vehic wind float', 'build wind float', 'headlamps',
       'build wind non-float', 'build wind non-float',
       'build wind non-float', 'build wind float', 'vehic wind float',
       'vehic wind float', 'build wind non-float', 'headlamps',
       'build wind non-float', 'containers', 'build wind non-float',
       'build wind float', 'build wind non-float', 'build wind non-float',
       'build wind float', 'containers', 'build wind non-float',
       'build wind non-float', 'headlamps', 'build wind non-float',
       'vehic wind float', 'build wind non-float', 'vehic wind float',
       'tableware', 'build wind non-float', 'build wind float',
       'build wind float', 'build wind float', 'build wind non-float',
       'build wind non-float', 'build wind non-float', 'build wind float',
       'build wind non-float', 'build wind non-float',
  

In [10]:
glass.target_names

['Type']

In [11]:
y_df = pd.DataFrame(glass.target, columns = glass.target_names)
y_df.head()

Unnamed: 0,Type
0,build wind float
1,vehic wind float
2,build wind float
3,tableware
4,build wind non-float


In [12]:
lrc = LogisticRegression(solver='liblinear', C=10.0, random_state=0)
rskf = RepeatedStratifiedKFold(n_splits=10, n_repeats=10, random_state=2)

i = 0
accuracy_lst = []
precision_lst = []
recall_lst = []
f1_lst = []
precision_recall_fscore_lst = []

for train_index, test_index in rskf.split(X_df, y_df):
    i = i+1
    X_train, X_test = X_df.iloc[train_index], X_df.iloc[test_index]
    y_train, y_test = y_df.iloc[train_index], y_df.iloc[test_index]
    lrc.fit(X_df.iloc[train_index], y_df.iloc[train_index])
    y_pred = lrc.predict(X_df.iloc[test_index])
    
    accuracy_value = accuracy_score(y_df.iloc[test_index], y_pred)
    accuracy_lst.append(accuracy_value)
    
    precision_value = precision_score(y_df.iloc[test_index], y_pred, average = 'macro')
    precision_lst.append(precision_value)
    
    recall_value = recall_score(y_df.iloc[test_index], y_pred, average = 'macro')
    recall_lst.append(recall_value)
    
    f1_value = f1_score(y_df.iloc[test_index], y_pred, average = 'macro')
    f1_lst.append(f1_value)
    
    precision_recall_fscore_value = precision_recall_fscore_support(y_df.iloc[test_index], y_pred, average = 'macro')
    precision_recall_fscore_lst.append(precision_recall_fscore_value)

  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn

  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifie

  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn

  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn

  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn

  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifie

  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return f(**kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
print('Metrics using Average = macro\n\
Accuracy: %f, Precision: %f, Recall: %f, f1_score: %f' 
      %(np.mean(accuracy_lst) * 100,
        np.mean(precision_lst) * 100,
        np.mean(recall_lst) * 100,
        np.mean(f1_lst) * 100))

Metrics using Average = macro
Accuracy: 63.121212, Precision: 47.100992, Recall: 50.788095, f1_score: 47.760607
