In [1]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)

In [2]:
X, y = mnist["data"], mnist["target"]

In [3]:
print(X.shape)
print(y.shape)

(70000, 784)
(70000,)


In [4]:
# Split 50,000 instances for training, 10,000 for validation, and 10,000 for testing.

from sklearn.model_selection import train_test_split

X_train_val, X_test, y_train_val, y_test = train_test_split( mnist.data, mnist.target, test_size=10000, random_state=42)
X_train, X_val, y_train, y_val = train_test_split( X_train_val, y_train_val, test_size=10000, random_state=42)

In [17]:
# Impoting model form sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import time
from sklearn.metrics import roc_curve, roc_auc_score, classification_report, accuracy_score, confusion_matrix 
from sklearn.model_selection import cross_val_score

In [6]:
# making Instance of the models 
logisticRegr_model = LogisticRegression(penalty='l2',dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='lbfgs', multi_class='auto',  verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)
svm_cls = SVC(C=1.0, kernel='rbf', degree=3, gamma='scale', coef0=0.0, shrinking=True, probability=True, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=- 1, decision_function_shape='ovr', break_ties=False, random_state=None)

In [7]:
# Training model and Checking model training timing. 

start_time_log = time.time()
logisticRegr_model.fit(X_train, y_train)
end_time_log = time.time()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [8]:
# checking accuracy score ROC.
y_train_hat = logisticRegr_model.predict(X_train)
y_train_hat_probs = logisticRegr_model.predict_proba(X_train)

train_accuracy = accuracy_score(y_train, y_train_hat)*100
train_roc = roc_auc_score(y_train, y_train_hat_probs, average='macro', sample_weight=None, max_fpr=None, multi_class='ovr', labels=None)*100


In [9]:
# Here is Details of Logistic regeression.
print("Training time {:.2f}s".format(end_time_log - start_time_log))
print('Confusion matrix:\n', confusion_matrix(y_train, y_train_hat))
print('AUC: %.4f %%' % train_roc)
print('Accuracy: %.4f %%' % train_accuracy)
print(classification_report(y_train, y_train_hat))

Training time 44.51s
Confusion matrix:
 [[4810    0   10    9    7   18   25    8   28    8]
 [   1 5449   19   14    4   16    3   11   42    8]
 [  22   27 4615   66   50   16   46   43  113   18]
 [  15   14   90 4655    5  124   14   31   96   35]
 [  10   20   31    9 4655    3   34   15   28  147]
 [  44   14   34  123   39 4014   63   15  115   30]
 [  26    9   29    3   28   55 4798    4   15    3]
 [   7   17   53   21   37    6    2 4859   16  150]
 [  24   72   58  104   14  107   26   14 4410   39]
 [  16   18   13   57  114   31    3  119   33 4562]]
AUC: 99.5327 %
Accuracy: 93.6540 %
              precision    recall  f1-score   support

           0       0.97      0.98      0.97      4923
           1       0.97      0.98      0.97      5567
           2       0.93      0.92      0.93      5016
           3       0.92      0.92      0.92      5079
           4       0.94      0.94      0.94      4952
           5       0.91      0.89      0.90      4491
           6   

In [10]:
# Counting Accuracy score and rc for test data set
y_test_hat = logisticRegr_model.predict(X_test)
y_test_hat_probs = logisticRegr_model.predict_proba(X_test)

test_accuracy = accuracy_score(y_test, y_test_hat)*100

test_auc_roc = roc_auc_score(y_test, y_test_hat_probs,average='macro', sample_weight=None, max_fpr=None, multi_class='ovr', labels=None)*100


In [11]:
# here details for the logistic regression for test data.
print("Training time {:.2f}s".format(end_time_log - start_time_log))
print('Confusion matrix:\n', confusion_matrix(y_train, y_train_hat))
print('AUC: %.4f %%' % train_roc)
print('Accuracy: %.4f %%' % train_accuracy)
print(classification_report(y_train, y_train_hat))

Training time 44.51s
Confusion matrix:
 [[4810    0   10    9    7   18   25    8   28    8]
 [   1 5449   19   14    4   16    3   11   42    8]
 [  22   27 4615   66   50   16   46   43  113   18]
 [  15   14   90 4655    5  124   14   31   96   35]
 [  10   20   31    9 4655    3   34   15   28  147]
 [  44   14   34  123   39 4014   63   15  115   30]
 [  26    9   29    3   28   55 4798    4   15    3]
 [   7   17   53   21   37    6    2 4859   16  150]
 [  24   72   58  104   14  107   26   14 4410   39]
 [  16   18   13   57  114   31    3  119   33 4562]]
AUC: 99.5327 %
Accuracy: 93.6540 %
              precision    recall  f1-score   support

           0       0.97      0.98      0.97      4923
           1       0.97      0.98      0.97      5567
           2       0.93      0.92      0.93      5016
           3       0.92      0.92      0.92      5079
           4       0.94      0.94      0.94      4952
           5       0.91      0.89      0.90      4491
           6   

In [12]:
# importing SVM 
from sklearn.svm import SVC

In [13]:
# instance of SVM.
svm_cls = SVC(C=1.0, kernel='rbf', degree=3, gamma='scale', coef0=0.0, shrinking=True, probability=True, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=- 1, decision_function_shape='ovr', break_ties=False, random_state=None)

In [14]:
start_time_svm = time.time()
svm_cls.fit(X_train, y_train)
end_time_svm = time.time()

In [15]:
# here is accuracy score and roc for SVM.
y_pred = svm_cls.predict(X_test)
y_prob= svm_cls.predict_proba(X_test)

train_accuracy = accuracy_score(y_test, y_pred)*100
train_auc_roc = roc_auc_score(y_test, y_prob, average='macro', sample_weight=None, max_fpr=None, multi_class='ovo', labels=None)*100


In [16]:
# here the details of SVM 
print("Training time {:.2f}s".format(end_time_svm - start_time_svm))
print('Confusion matrix:\n', confusion_matrix(y_train, y_train_hat))
print('AUC: %.4f %%' % train_roc)
print('Accuracy: %.4f %%' % train_accuracy)
print(classification_report(y_train, y_train_hat))

Training time 1774.39s
Confusion matrix:
 [[4810    0   10    9    7   18   25    8   28    8]
 [   1 5449   19   14    4   16    3   11   42    8]
 [  22   27 4615   66   50   16   46   43  113   18]
 [  15   14   90 4655    5  124   14   31   96   35]
 [  10   20   31    9 4655    3   34   15   28  147]
 [  44   14   34  123   39 4014   63   15  115   30]
 [  26    9   29    3   28   55 4798    4   15    3]
 [   7   17   53   21   37    6    2 4859   16  150]
 [  24   72   58  104   14  107   26   14 4410   39]
 [  16   18   13   57  114   31    3  119   33 4562]]
AUC: 99.5327 %
Accuracy: 97.6000 %
              precision    recall  f1-score   support

           0       0.97      0.98      0.97      4923
           1       0.97      0.98      0.97      5567
           2       0.93      0.92      0.93      5016
           3       0.92      0.92      0.92      5079
           4       0.94      0.94      0.94      4952
           5       0.91      0.89      0.90      4491
           6 

# Error Confusion Metrix

In [19]:
# finding Error Confustion metrix using cross validation 
cross_val = cross_val_score(logisticRegr_model, X_train, y_train, cv=3, scoring="accuracy")
cross_val = cross_val_score(svm_cls, X_train, y_train,  scoring="accuracy")
conf_mx = confusion_matrix(y_train, cross_val)
print('Error Confusion Matrix: %.4f %%' % conf_mx)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

ValueError: Found input variables with inconsistent numbers of samples: [50000, 5]