In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, accuracy_score
import numpy as np 
from sklearn.preprocessing import LabelEncoder

In [2]:
img = pd.read_csv("Image_Segmention.csv")
lbl = LabelEncoder()
img['Class'] = lbl.fit_transform(img['Class'])
print(lbl.classes_)

['BRICKFACE' 'CEMENT' 'FOLIAGE' 'GRASS' 'PATH' 'SKY' 'WINDOW']


In [3]:
X = img.drop('Class', axis=1)
y = img['Class']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.3,
                                                    random_state=23,
                                                    stratify=y)

In [5]:
lr = LogisticRegression(multi_class='multinomial')
lr.fit(X_train, y_train)
y_pred_prob = lr.predict_proba(X_test)
print(log_loss(y_test, y_pred_prob))

0.2716345867547443


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [8]:
y_pred = lr.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.9206349206349206


Grid Search CV

In [9]:
from sklearn.model_selection import GridSearchCV 
from sklearn.model_selection import StratifiedKFold

In [12]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=23)
lr = LogisticRegression()
params = {'penalty':['l1','l2','elasticnet',None],
          'solver':['lbfgs', 'liblinear', 
                    'newton-cg', 'newton-cholesky', 'sag', 'saga'],
          'multi_class':['ovr','multinomial']}

# Default scoring=acc score
gcv = GridSearchCV(lr, param_grid=params,
                   cv=kfold)

In [13]:
gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

{'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
0.9044134727061557


In [14]:
# log loss
gcv = GridSearchCV(lr, param_grid=params,
                   cv=kfold, scoring='neg_log_loss')

gcv.fit(X, y)
print(gcv.best_params_)
print(gcv.best_score_)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

{'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'newton-cg'}
-0.5437095074321658


In [15]:
bm = gcv.best_estimator_
print(bm.coef_)
print(bm.intercept_)

[[-3.03862255e-02 -1.30246162e-01  7.28647322e-05 -1.64195684e-02
  -4.54834581e-05 -2.65731709e-01 -1.25088347e-01  3.68896022e-02
  -9.54673559e-02 -7.54437316e-02  2.68928981e-01 -3.37039263e-02
  -4.61556816e-01  1.03311905e+00  1.25219849e-01 -1.15833857e+00
  -5.48387231e-02 -2.49644093e-02  7.20452137e-02]
 [-5.99517352e-03 -6.07824669e-02  5.46198132e-05  1.13996874e-01
   7.06940271e-02 -8.39837226e-02  1.49651052e-01  2.94561155e-01
  -7.08018707e-02  1.37759243e-01  2.77920736e-01  2.32373862e-01
  -9.70175522e-02  4.20482175e-01  2.83846246e-01 -7.04329275e-01
  -5.77191076e-01  2.11576578e-02 -1.79030256e-01]
 [-9.11741611e-03 -1.70661121e-01  8.53449615e-04 -1.92292424e-02
   1.27724406e-03  2.86631939e-01  6.24974490e-02 -5.10126586e-02
   4.79566634e-02 -2.75416324e-01 -5.34962848e-01 -3.47068796e-01
   5.57816209e-02 -7.78639735e-01 -2.14954892e-01  9.93594719e-01
   7.12171960e-01  7.45104206e-01 -5.94341122e-01]
 [ 1.84291150e-02  2.22667913e-01 -7.69806110e-04  1.93

Inferencing

In [17]:
tst_img = pd.read_csv("tst_img.csv")
# y_pred_prob = bm.predict_proba(tst_img)
# np.argmax(y_pred_prob, axis=1)
# OR
y_pred = bm.predict(tst_img)
print(lbl.inverse_transform(y_pred))

['SKY' 'PATH' 'GRASS' 'GRASS']
