In [1]:
import numpy as np
import matplotlib.pyplot as plt

from scipy.ndimage import convolve
from sklearn import linear_model, datasets, metrics
from sklearn.model_selection import train_test_split
from sklearn.neural_network import BernoulliRBM
from sklearn.preprocessing import minmax_scale
from sklearn.base import clone

In [15]:
def nudge_dataset(X, y):
  direction_verctos = [
      [[0,1,0],[0,0,0],[0,0,0]],
      [[0,0,0],[1,0,0],[0,0,0]],
      [[0,0,0],[0,0,1],[0,0,0]],
      [[0,1,0],[0,0,0],[0,1,0]],
  ]

  def shift(x, w):
    return convolve(x.reshape((8,8)), mode='constant', weights=w).ravel()

  X = np.concatenate(
      [X] + [np.apply_along_axis(shift, 1, X, vector) for vector in direction_verctos]
  )

  Y = np.concatenate([y for _ in range(5)], axis=0)
  return X, Y

In [22]:
#load data
X, y = datasets.load_digits(return_X_y=True)
print(X.shape, y.shape)

X, y = nudge_dataset(X, y)
print(X.shape, y.shape)

(1797, 64) (1797,)
(8985, 64) (8985,)


In [25]:
# from pandas.core.dtypes.base import E
X = minmax_scale(X, feature_range=(0,1))
X[0].reshape(8,8)

array([[0.        , 0.        , 0.3125    , 0.8125    , 0.5625    ,
        0.0625    , 0.        , 0.        ],
       [0.        , 0.        , 0.40625   , 0.46875   , 0.3125    ,
        0.46875   , 0.16129032, 0.        ],
       [0.        , 0.11538462, 0.46875   , 0.0625    , 0.        ,
        0.34375   , 0.26666667, 0.        ],
       [0.        , 0.17391304, 0.375     , 0.        , 0.        ,
        0.25      , 0.30769231, 0.        ],
       [0.        , 0.2173913 , 0.25      , 0.        , 0.        ,
        0.28125   , 0.30769231, 0.        ],
       [0.        , 0.13793103, 0.34375   , 0.        , 0.03125   ,
        0.375     , 0.23333333, 0.        ],
       [0.        , 0.10526316, 0.4375    , 0.15625   , 0.3125    ,
        0.375     , 0.        , 0.        ],
       [0.        , 0.        , 0.375     , 0.8125    , 0.625     ,
        0.        , 0.        , 0.        ]])

In [26]:
X[11].reshape(8,8)

array([[0.        , 0.        , 0.        , 0.        , 0.875     ,
        0.8125    , 0.0625    , 0.        ],
       [0.        , 0.        , 0.        , 0.15625   , 0.5       ,
        0.5       , 0.06451613, 0.        ],
       [0.        , 0.        , 0.        , 0.4375    , 0.5       ,
        0.375     , 0.        , 0.        ],
       [0.        , 0.04347826, 0.3125    , 0.5       , 0.5       ,
        0.375     , 0.        , 0.        ],
       [0.        , 0.13043478, 0.375     , 0.4375    , 0.5       ,
        0.28125   , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.15625   , 0.5       ,
        0.46875   , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.125     , 0.5       ,
        0.4375    , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.0625    , 0.8125    ,
        1.        , 0.0625    , 0.        ]])

In [27]:
X_train, X_test, y_train, y_test =train_test_split(X, y, test_size=0.2, random_state=0)
X_train.shape

(7188, 64)

In [28]:
#Models

from sklearn.pipeline import Pipeline
logistic = linear_model.LogisticRegression()
rbm = BernoulliRBM()
rbm_classifier = Pipeline(steps=[("rbm", rbm), ("logistic", logistic)])


In [30]:
rbm.learning_rate = 0.06
rbm.n_iter = 10
rbm.n_components = 100
logistic.C = 6000

In [31]:
rbm_classifier.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Pipeline(steps=[('rbm', BernoulliRBM(learning_rate=0.06, n_components=100)),
                ('logistic', LogisticRegression(C=6000))])

In [34]:
raw_pixel = clone(logistic)
raw_pixel.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression(C=6000)

In [38]:
y_pred_raw = raw_pixel.predict(X_test)
print("Logistic on RAW: \n", metrics.classification_report(y_test, y_pred_raw))


y_pred_rbm = rbm_classifier.predict(X_test)
print("Logistic on RBM: \n", metrics.classification_report(y_test, y_pred_rbm))

Logistic on RAW: 
               precision    recall  f1-score   support

           0       0.91      0.95      0.93       174
           1       0.69      0.73      0.71       184
           2       0.82      0.93      0.88       166
           3       0.88      0.85      0.87       194
           4       0.92      0.88      0.90       186
           5       0.85      0.78      0.82       181
           6       0.93      0.91      0.92       207
           7       0.88      0.95      0.92       154
           8       0.74      0.66      0.70       182
           9       0.80      0.82      0.81       169

    accuracy                           0.84      1797
   macro avg       0.84      0.85      0.84      1797
weighted avg       0.84      0.84      0.84      1797

Logistic on RBM: 
               precision    recall  f1-score   support

           0       0.78      0.87      0.83       174
           1       0.60      0.62      0.61       184
           2       0.72      0.76      0

In [None]:
#ploting 


# plt.