In [30]:
import requests
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import time

In [2]:
train_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tra"
test_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tes"

In [3]:
train_df = pd.read_csv(train_url, header = None)
test_df = pd.read_csv(test_url, header = None)

In [4]:
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,64
0,0,1,6,15,12,1,0,0,0,7,...,0,0,0,6,14,7,1,0,0,0
1,0,0,10,16,6,0,0,0,0,7,...,0,0,0,10,16,15,3,0,0,0
2,0,0,8,15,16,13,0,0,0,1,...,0,0,0,9,14,0,0,0,0,7
3,0,0,0,3,11,16,0,0,0,0,...,0,0,0,0,1,15,2,0,0,4
4,0,0,5,14,4,0,0,0,0,0,...,0,0,0,4,12,14,7,0,0,6


In [5]:
test_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,64
0,0,0,5,13,9,1,0,0,0,0,...,0,0,0,6,13,10,0,0,0,0
1,0,0,0,12,13,5,0,0,0,0,...,0,0,0,0,11,16,10,0,0,1
2,0,0,0,4,15,12,0,0,0,0,...,0,0,0,0,3,11,16,9,0,2
3,0,0,7,15,13,1,0,0,0,8,...,0,0,0,7,13,13,9,0,0,3
4,0,0,0,1,11,0,0,0,0,0,...,0,0,0,0,2,16,4,0,0,4


In [6]:
train_lbl_df = train_df[64]

In [7]:
train_img_df = train_df.drop([64], axis = 1)

In [8]:
train_lbl_df.head()

0    0
1    0
2    7
3    4
4    6
Name: 64, dtype: int64

In [9]:
train_img_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0,1,6,15,12,1,0,0,0,7,...,0,0,0,0,6,14,7,1,0,0
1,0,0,10,16,6,0,0,0,0,7,...,3,0,0,0,10,16,15,3,0,0
2,0,0,8,15,16,13,0,0,0,1,...,0,0,0,0,9,14,0,0,0,0
3,0,0,0,3,11,16,0,0,0,0,...,0,0,0,0,0,1,15,2,0,0
4,0,0,5,14,4,0,0,0,0,0,...,12,0,0,0,4,12,14,7,0,0


In [10]:
test_lbl_df = test_df[64]
test_img_df = test_df.drop([64], axis = 1)

In [11]:
test_lbl_df.head()

0    0
1    1
2    2
3    3
4    4
Name: 64, dtype: int64

In [12]:
test_img_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0,0,5,13,9,1,0,0,0,0,...,0,0,0,0,6,13,10,0,0,0
1,0,0,0,12,13,5,0,0,0,0,...,0,0,0,0,0,11,16,10,0,0
2,0,0,0,4,15,12,0,0,0,0,...,5,0,0,0,0,3,11,16,9,0
3,0,0,7,15,13,1,0,0,0,8,...,9,0,0,0,7,13,13,9,0,0
4,0,0,0,1,11,0,0,0,0,0,...,0,0,0,0,0,2,16,4,0,0


In [13]:
train_img = train_img_df.as_matrix()
test_img = test_img_df.as_matrix()
train_lbl = train_lbl_df.as_matrix()
test_lbl = test_lbl_df.as_matrix()

In [14]:
print(train_img.shape)
print(test_img.shape)
print(train_lbl.shape)
print(test_lbl.shape)

(3823, 64)
(1797, 64)
(3823,)
(1797,)


In [16]:
svc = SVC()

In [25]:
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10, 100, 1000], 'gamma': [0.001, 0.0001]}

In [34]:
start_time = time.time()
clf = GridSearchCV(svc, parameters)
clf.fit(train_img, train_lbl)
end_time = time.time()
print("Training finished in {} seconds".format(end_time - start_time))

Training finished in 14.140989065170288 seconds


In [35]:
print("Best score on training data: {}".format(clf.best_score_))

Best score on training data: 0.9916296102537274


In [36]:
print("Best params for SVM classificator:")
print(clf.best_params_)

Best params for SVM classificator:
{'gamma': 0.001, 'C': 10, 'kernel': 'rbf'}


In [39]:
test_score = clf.score(test_img, test_lbl)
print("Accuracy on test data: {}".format(test_score))

Accuracy on test data: 0.9827490261547023
