In [6]:
from scipy.io import loadmat
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

import random
from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning

@ignore_warnings(category=ConvergenceWarning)
def featureSubsetScore(dat,label,threshold = 0.5):
    trainScore = []
    testScore = []
    for seed in range(10):
        np.random.seed(seed)
        nInst,_ = dat.shape
        nTrain = 200
        rp = np.random.permutation(nInst)
        nTest = nInst - nTrain
        trainDat = dat[rp[:nTrain]]
        testDat = dat[rp[nTrain+1:]]
        trainLabel = label[rp[:nTrain]]
        testLabel = label[rp[nTrain+1:]]
        mlr = LogisticRegression().fit(trainDat,trainLabel.ravel())
        testConfidence = mlr.predict_proba(testDat)
        testPreds = testConfidence[:,1] > threshold
        testPreds = testPreds.astype(int)
        testPreds[testPreds == 1] = 2
        testPreds[testPreds == 0] = 1
        testPreds = testPreds.reshape(-1,1)
        testScore.append((testPreds==testLabel).sum()/nTest)

        trainConfidence = mlr.predict_proba(trainDat)
        trainPreds = trainConfidence[:,1] > threshold
        trainPreds = trainPreds.astype(int)
        trainPreds[trainPreds == 1] = 2
        trainPreds[trainPreds == 0] = 1
        trainPreds = trainPreds.reshape(-1,1)
        trainScore.append((trainPreds==trainLabel).sum()/nTrain)
    return [np.mean(trainScore),np.mean(testScore)]

@ignore_warnings(category=ConvergenceWarning)
def svmLinear(dat,label,threshold = 0.5):
    trainScore = []
    testScore = []
    for seed in range(10):
        np.random.seed(seed)
        nInst,_ = dat.shape
        nTrain = 200
        rp = np.random.permutation(nInst)
        nTest = nInst - nTrain
        trainDat = dat[rp[:nTrain]]
        testDat = dat[rp[nTrain+1:]]
        trainLabel = label[rp[:nTrain]]
        testLabel = label[rp[nTrain+1:]]
        svm = SVC(kernel='linear').fit(trainDat,trainLabel.ravel())

        testScore.append(svm.score(testDat,testLabel))


        trainScore.append(svm.score(trainDat,trainLabel))
    return [np.mean(trainScore),np.mean(testScore)]

@ignore_warnings(category=ConvergenceWarning)
def svmPoly(dat,label,threshold = 0.5):
    trainScore = []
    testScore = []
    for seed in range(10):
        np.random.seed(seed)
        nInst,_ = dat.shape
        nTrain = 200
        rp = np.random.permutation(nInst)
        nTest = nInst - nTrain
        trainDat = dat[rp[:nTrain]]
        testDat = dat[rp[nTrain+1:]]
        trainLabel = label[rp[:nTrain]]
        testLabel = label[rp[nTrain+1:]]
        svm = SVC(kernel='poly').fit(trainDat,trainLabel.ravel())

        testScore.append(svm.score(testDat,testLabel))

        trainScore.append(svm.score(trainDat,trainLabel))
    return [np.mean(trainScore),np.mean(testScore)]

if __name__ == "__main__":
    annots = loadmat('USPS_handwritten.mat')
    print(annots.keys())
    dat = annots['A']
    nInst,fLen = dat.shape
    label = annots['L']

    print("SVM Linear")
    print(svmLinear(dat,label))
    print("SVM Poly")
    print(svmPoly(dat,label))
    print("Accuracy:" , metrics.accuracy_score(y_test, y_prediction))
    print("Confusion Matrix:\n", metrics.confusion_matrix(y_test, y_prediction))
    print("Training Time:" + str(stop - start) + "s")


dict_keys(['__header__', '__version__', '__globals__', 'A', 'L'])
SVM Linear
[1.0, 0.8817077527688459]
SVM Poly
[0.9810000000000001, 0.8148267238299394]
