In [1]:
from sklearn.linear_model import  LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, accuracy_score
from sklearn.model_selection import StratifiedKFold, cross_val_score
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

In [2]:
img = pd.read_csv('Image_Segmention.csv')
#Label Encoding
lbl = LabelEncoder()
img['Class'] = lbl.fit_transform(img['Class'])

X = img.drop('Class',axis=1)
y = img['Class']

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=23, stratify=y)
#use stratify only for classification problem (its good practice)

In [4]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=23)
lr = LogisticRegression(multi_class='multinomial')
lr.fit(X_train, y_train)
#Default scoring = acc score
results = cross_val_score(lr, X, y, cv=kfold)
print(results.mean())
#log loss
results = cross_val_score(lr, X, y, cv=kfold,scoring='neg_log_loss')
print(results.mean())

0.8471544715447156
-0.7549849828029452


In [5]:
y_pred_prob = lr.predict_proba(X_test)
# print(accuracy_score(y_test, y_pred_prob))

y_pred = lr.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.9206349206349206


In [6]:
penalty = ['l1','l2','elasticnet',None]
params = {'penalty':penalty}
gcv = GridSearchCV(lr,param_grid=params,cv=kfold)
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)

{'penalty': 'l2'}
0.8471544715447156


10 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/linear_model/_logistic.py", line 1169, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^

In [7]:
solver = ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']
params = {'penalty':penalty,'solver':solver,'multi_class':['ovr','multinomial']}
gcv = GridSearchCV(lr,param_grid=params,cv=kfold)
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)

In [None]:
solver = ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']
params = {'penalty':penalty,'solver':solver,'multi_class':['ovr','multinomial']}
gcv = GridSearchCV(lr,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)

Traceback (most recent call last):
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 813, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 401, in _score
    return self._sign * self._score_func(y, y_pred, **scoring_kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/utils/_param_validation.py", line 211, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/dai/anaconda3/lib/python3.11/site-

{'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'newton-cg'}
-0.5465525609071172


In [None]:
bm = gcv.best_estimator_
print(bm.coef_)
print(bm.intercept_)

[[-3.02403569e-02 -1.29097151e-01  1.05406506e-03 -1.64282484e-02
  -4.54631921e-05 -2.65706121e-01 -1.25123165e-01  3.68222555e-02
  -9.53361863e-02 -7.55518396e-02  2.68787652e-01 -3.37518384e-02
  -4.61691898e-01  1.03301938e+00  1.25400437e-01 -1.15841949e+00
  -5.48293635e-02 -2.48404226e-02  7.18655748e-02]
 [-5.85826462e-03 -5.97605861e-02 -1.98316736e-04  1.14007534e-01
   7.06925154e-02 -8.39163922e-02  1.49268512e-01  2.94176341e-01
  -7.06496922e-02  1.37638236e-01  2.77752016e-01  2.32265216e-01
  -9.71032078e-02  4.20339034e-01  2.83883328e-01 -7.04223218e-01
  -5.77254182e-01  2.11481520e-02 -1.78948045e-01]
 [-8.98890654e-03 -1.69606871e-01 -8.21690572e-04 -1.92327700e-02
   1.27682667e-03  2.86385357e-01  6.23072934e-02 -5.11576619e-02
   4.79211953e-02 -2.75448329e-01 -5.34992721e-01 -3.47056044e-01
   5.57027295e-02 -7.78633341e-01 -2.14820623e-01  9.93454056e-01
   7.11871289e-01  7.45091432e-01 -5.94164649e-01]
 [ 1.74678052e-02  2.17556125e-01 -1.55326435e-03  2.30

Inferencing

In [None]:
tst_img = pd.read_csv('tst_img.csv')
# y_pred_prob = bm.predict_proba(tst_img)
# print(y_pred_prob)
# np.argmax(y_pred_prob, axis=1)
##### OR
y_pred = bm.predict(tst_img)
print(y_pred)
#Label Decoding
lbl.inverse_transform(y_pred)

[[5.55070850e-20 1.35688670e-05 1.91468554e-24 8.38374901e-16
  1.77082414e-04 9.99809349e-01 6.76887895e-17]
 [1.64269457e-16 1.95062080e-07 2.81360324e-24 2.11266411e-09
  9.99999802e-01 4.09637284e-10 7.88401394e-18]
 [6.28628098e-29 3.43165454e-23 7.53681125e-07 9.99999246e-01
  6.54317055e-14 2.19621118e-20 5.49046281e-14]
 [1.01152537e-26 3.58396941e-22 2.47101397e-05 9.99975290e-01
  2.32686760e-13 1.90943661e-21 2.33962852e-13]]
[5 4 3 3]


array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object)