# Lab-11: Multinomial Logistic Regression

In [427]:
import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from sklearn.model_selection import train_test_split as tt_split
from sklearn import metrics
from sklearn.metrics import accuracy_score as acc
from sklearn.metrics import confusion_matrix
from sklearn.metrics import r2_score

In [428]:
data = pd.read_csv("C:/Users/Suresh Kumar/Downloads/glass_multiclass.csv")

In [429]:
data.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [430]:
train, test = tt_split(data, test_size = 0.2)

In [431]:
train_data = train.iloc[:, :-1]
train_label = train.iloc[:,-1]
test_data = test.iloc[:,:-1]
test_label = test.iloc[:,-1]

## Model-3: independent variables (RI,Na, Mg, AI, SI, K, Ca, Ba, Fe)

In [432]:
clf = LR(penalty= 'l2', multi_class = 'multinomial', solver = 'newton-cg', class_weight = 'balanced')

In [433]:
clf.fit(train_data, train_label)

LogisticRegression(C=1.0, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='multinomial', n_jobs=1, penalty='l2',
          random_state=None, solver='newton-cg', tol=0.0001, verbose=0,
          warm_start=False)

In [434]:
y_pred = clf.predict(test_data)

In [435]:
acc(test_label, y_pred)

0.53488372093023251

In [436]:
confusion_matrix(test_label, y_pred)

array([[8, 2, 4, 0, 0, 0],
       [2, 5, 8, 0, 1, 0],
       [1, 2, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 2, 0],
       [0, 0, 0, 0, 0, 6]], dtype=int64)

In [437]:
r2_score(test_label, y_pred)

0.7321206080239222

In [438]:
fpr, tpr, thresholds = metrics.roc_curve(test_label, y_pred, pos_label=2)
metrics.auc(fpr, tpr)

0.48148148148148151

## Model-1: independent variables (RI,Na, Mg)

In [439]:
train_data_model2 = train_data.iloc[:,:3]
test_data_model2 = test_data.iloc[:,:3]

In [440]:
clf2 = LR(penalty= 'l2', multi_class = 'multinomial', solver = 'newton-cg', class_weight = 'balanced')

In [441]:
clf2.fit(train_data_model2, train_label)
y_pred2 = clf2.predict(test_data_model2)
acc(test_label, y_pred2)

0.55813953488372092

In [442]:
confusion_matrix(test_label, y_pred2)

array([[9, 1, 4, 0, 0, 0],
       [8, 3, 4, 0, 0, 1],
       [0, 0, 4, 0, 0, 0],
       [0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1, 1],
       [0, 0, 0, 0, 0, 6]], dtype=int64)

In [443]:
r2_score(test_label, y_pred2)

0.70533266882631451

In [444]:
fpr2, tpr2, thresholds2 = metrics.roc_curve(test_label, y_pred2, pos_label=2)
metrics.auc(fpr2, tpr2)

0.33333333333333331

## Model-2: independent variables (RI,Na, Mg, AI, SI, K)

In [445]:
train_data_model3 = train_data.iloc[:,:6]
test_data_model3 = test_data.iloc[:,:6]

In [446]:
clf3 = LR(penalty= 'l2', multi_class = 'multinomial', solver = 'newton-cg', class_weight = 'balanced')

In [447]:
clf3.fit(train_data_model3, train_label)
y_pred3 = clf3.predict(test_data_model3)
acc(test_label, y_pred3)

0.53488372093023251

In [448]:
confusion_matrix(test_label, y_pred3)

array([[9, 1, 4, 0, 0, 0],
       [3, 4, 8, 0, 1, 0],
       [1, 2, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 2, 0],
       [0, 0, 0, 0, 0, 6]], dtype=int64)

In [449]:
r2_score(test_label, y_pred3)

0.7321206080239222

In [450]:
fpr3, tpr3, thresholds3 = metrics.roc_curve(test_label, y_pred3, pos_label=2)
metrics.auc(fpr3, tpr3)

0.47453703703703709

## Model-4: independent variables (SI, K, Ca, Ba, Fe)

In [451]:
train_data_model4 = train_data.iloc[:,4:]
test_data_model4 = test_data.iloc[:,4:]

In [452]:
clf4 = LR(penalty= 'l2', multi_class = 'multinomial', solver = 'newton-cg', class_weight = 'balanced')

In [453]:
clf4.fit(train_data_model4, train_label)
y_pred4 = clf4.predict(test_data_model4)
acc(test_label, y_pred4)

0.53488372093023251

In [454]:
confusion_matrix(test_label, y_pred4)

array([[7, 2, 5, 0, 0, 0],
       [1, 4, 9, 0, 2, 0],
       [0, 0, 3, 0, 1, 0],
       [0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 2, 0],
       [0, 0, 0, 0, 0, 6]], dtype=int64)

In [455]:
r2_score(test_label, y_pred4)

0.60889608771492654

In [456]:
fpr4, tpr4, thresholds4 = metrics.roc_curve(test_label, y_pred4, pos_label=2)
metrics.auc(fpr4, tpr4)

0.44328703703703709