In [30]:
from sklearn.datasets import load_svmlight_file
from sklearn.cross_validation import train_test_split
from sklearn import preprocessing

from sklearn.neural_network import BernoulliRBM
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np



In [31]:
# load datasets in svmlib format
x, y = load_svmlight_file("dataset1.txt")

# X is scipy.sparse CSR matrix, we need to convert it to numpy array
X = x.toarray()

# scalling to [0,1]
min_max_scaler = preprocessing.MinMaxScaler()
X_scaled = min_max_scaler.fit_transform(X)



In [32]:
# split train testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3) # 30% test


In [33]:
# model: RBM Feautre Extraction + SVM
rbm = BernoulliRBM(random_state=0, verbose=True, n_components=128, learning_rate=0.01)
svm = SVC(kernel="linear")
classifier = Pipeline(steps=[("rbm", rbm),("svm", svm)])

In [38]:
# training
classifier.fit(X_train, y_train)


[BernoulliRBM] Iteration 1, pseudo-likelihood = -201.17, time = 0.48s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -168.42, time = 0.53s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -150.00, time = 0.50s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -141.08, time = 0.50s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -136.14, time = 0.50s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -130.04, time = 0.50s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -125.52, time = 0.49s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -121.83, time = 0.50s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -118.16, time = 0.50s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -115.31, time = 0.51s


Pipeline(memory=None,
     steps=[('rbm', BernoulliRBM(batch_size=10, learning_rate=0.01, n_components=128, n_iter=10,
       random_state=0, verbose=True)), ('svm', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

In [39]:
# predict testing data
y_predict = classifier.predict(X_test)



In [40]:
# reporting classification results on testing data (performance)
print(classification_report(y_test, y_predict))

# reporting confusion matrix
print(confusion_matrix(y_test, y_predict))

             precision    recall  f1-score   support

        0.0       0.96      0.97      0.96       136
        1.0       0.97      0.97      0.97       176
        2.0       0.91      0.89      0.90       145
        3.0       0.88      0.91      0.89       146
        4.0       0.94      0.93      0.93       151
        5.0       0.90      0.87      0.89       134
        6.0       0.94      0.96      0.95       166
        7.0       0.91      0.95      0.93       156
        8.0       0.88      0.88      0.88       130
        9.0       0.93      0.87      0.90       160

avg / total       0.92      0.92      0.92      1500

[[132   0   0   0   0   0   3   0   1   0]
 [  0 171   1   1   1   0   0   1   1   0]
 [  1   1 129   4   2   0   5   1   2   0]
 [  0   0   1 133   0   4   0   2   5   1]
 [  0   0   1   1 140   1   2   0   2   4]
 [  1   1   3   6   0 117   0   1   3   2]
 [  0   0   2   1   0   2 160   0   1   0]
 [  1   1   1   0   1   0   0 148   0   4]
 [  2   1   3   4