In [1]:
import pandas as pd
import numpy as np

# Import Data

In [2]:
class ClassData():
    class_no = -1
    data = None
    target = None
    train_data = None
    test_data = None 
    train_target = None
    test_target = None 
    def __init__(self, class_no):
        self.class_no = class_no
    def __print__(self):
        print('data.shape=',self.data.shape)
        print('train_data.shape=',self.train_data.shape)
        print('test_data.shape=',self.test_data.shape)
        print('target.shape=',self.target.shape)
        print('train_target.shape=',self.train_target.shape)
        print('test_target.shape=',self.test_target.shape)
        
class DataLoader():  
    def __init__(self, c=40, n=10, ts=2, rs=42): 
        """
        c: number of class
        n: number of data in each class
        ts: test_size in train_test_split
        rs: random_state in train_test_split
        """
        # fetch data
        from sklearn.datasets import fetch_olivetti_faces
        d = fetch_olivetti_faces()
        # split data by class
        dcdata, dctarget = np.array_split(d.data, c), np.array_split(d.target, c)
        self.dclist = []
        for i in range(c):
            dc = ClassData(i)
            dc.data = dcdata[i]
            dc.target = dctarget[i]
            self.dclist.append(dc)
        # split data into train/test
        from sklearn.model_selection import train_test_split
        for i in range(c):
            dc = self.dclist[i]
            dc.train_data, dc.test_data = train_test_split(dc.data, test_size=ts,random_state=rs)
            dc.train_target, dc.test_target = train_test_split(dc.target, test_size=ts,random_state=rs)
        #recombine data
        self.train_data = np.concatenate( [self.dclist[i].train_data for i in range(c)] )
        self.test_data = np.concatenate( [self.dclist[i].test_data for i in range(c)] )
        self.train_target = np.concatenate( [self.dclist[i].train_target for i in range(c)] )
        self.test_target = np.concatenate( [self.dclist[i].test_target for i in range(c)] )

    def __print__(self):
        print('train_data.shape=',self.train_data.shape)
        print('test_data.shape=',self.test_data.shape)
        print('train_target.shape=',self.train_target.shape)
        print('test_target.shape=',self.test_target.shape)      

In [3]:
dl = DataLoader()

In [4]:
dl.__print__()

train_data.shape= (320, 4096)
test_data.shape= (80, 4096)
train_target.shape= (320,)
test_target.shape= (80,)


# SVM

In [5]:
from sklearn.svm import SVC

In [6]:
svc = SVC()

In [7]:
svc.fit(dl.train_data,dl.train_target)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [8]:
svc.score(dl.test_data,dl.test_target)

0.8125

In [9]:
svc1 = SVC(C=10)
svc1.fit(dl.train_data,dl.train_target)
svc1.score(dl.test_data,dl.test_target)

0.875

In [39]:
svc3 = SVC(C=25, tol=1e-1)
svc3.fit(dl.train_data,dl.train_target)
svc3.score(dl.test_data,dl.test_target)

0.96250000000000002

In [40]:
svc2 = SVC(C=28, tol=1e-1)
svc2.fit(dl.train_data,dl.train_target)
svc2.score(dl.test_data,dl.test_target)

0.97499999999999998

In [44]:
from sklearn.model_selection import GridSearchCV
parameters = {'C':(1,100), 'tol': (1e-1, 1e-5)}
est = SVC(random_state = 0)
clf = GridSearchCV(est, parameters)
clf.fit(dl.train_data, dl.train_target)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=0, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'tol': (0.1, 1e-05), 'C': (1, 100)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [45]:
print(clf.best_score_)
print(clf.best_params_)

0.925
{'tol': 1e-05, 'C': 100}


# back up