In [10]:
import numpy as np
import time
import pickle
import random
import math

FEATURE_NUM = 123
def loadDataSet(filename):
        dataMat = []
        labelMat = []
        fr = open(filename, 'r')
        for line in fr.readlines():
            lineArr = line.strip().split(' ')
            vec = np.zeros(FEATURE_NUM)
            for v in lineArr[1:]:
                feature_no = int(v.split(':')[0])
                feature_value = int(v.split(':')[1])
                vec[feature_no-1] = feature_value
            dataMat.append(vec)
            labelMat.append(int(lineArr[0]))
        return np.array(dataMat), np.array(labelMat)
        
def predict(dataMatrix, classLabels, weights, bias):
    m, n = np.shape(dataMatrix)
    right = 0
    wrong = 0
    for i in range(m): 
        rst = np.sum(np.dot(weights,dataMatrix[i]))+ bias
        if rst >= 1:
            rst = 1
        else:
            rst = -1
        if rst == classLabels[i]:
            right += 1
        else:
            wrong += 1
    return right, wrong

def derivateLw(dataMatrix, classLabels, weights, bias, C):
    condition = 1 - np.dot(classLabels, np.matmul(dataMatrix, np.transpose(weights))+bias)
    compare = np.less(condition, 0)
    gw = np.zeros(FEATURE_NUM)
    for i, c in enumerate(compare):
        if not c:
            gw[i] -= (classLabels[i]*dataMatrix[i])
    Lw = weights + (C/len(dataMatrix))*gw
    return Lw

def derivateLb(dataMatrix, classLabels, weights, bias, C):
    condition = 1 - np.dot(classLabels, np.matmul(dataMatrix, np.transpose(weights))+bias)
    compare = np.less(condition, 0)
    gb = 0.0
    for i, c in enumerate(compare):
        if not c:
            gb[i] -= classLabels[i]
    Lb = (C/len(dataMatrix))*gb
    return Lb

def NAG(dataMatrix, classLabels, weights, bias, k, alpha, v):
    C = 64
    v = k*v - alpha * derivateLw(dataMatrix, classLabels, weights, bias, C)
    weights = weights + v
    lr = 0.01
    bias = bias - lr*derivateLb(dataMatrix, classLabels, weights, bias, C)
    return weights, bias, v

def stocGradDescent(dataMatrix, classLabels, weights, bias):
    m, n = np.shape(dataMatrix)
    lr = 0.01
    C = 64
    weights = weights - lr*derivateLw(dataMatrix, classLabels, weights, bias, C)
    bias = bias - lr*derivateLb(dataMatrix, classLabels, weights, bias, C)
    return weights, bias
    
def saveModel(object):
    saveDir = "F:\\实验\\lab2\\svm_model_"+str(time.strftime('%Y-%m-%d-%H-%M-%S',time.localtime(time.time())))+".m"
    with open(saveDir, 'wb') as f:
        pickle.dump(object, f)
        print (saveDir + " Save Success!")

def loadModel(filename):
    with open(filename, 'rb') as f:
        param_list = pickle.load(f)
        return param_list[0], param_list[1]

def train(preModelName=""):
    weights = np.random.randn(1,FEATURE_NUM)
    v = 0
    k = 0.3
    alpha = 0.7
    bias = np.random.randn()
    if preModelName == "":
        pass
    else:
        weights, bias = pickle.load(preModelName)
    dMat_train, lMat_train = loadDataSet("F:\\实验\\lab2\\a9a_train")                                                         
    batch_num = 128
    iter_num = 10000
    for i in range(iter_num):
        if i%20000 == 0 and i!=0:
            saveModel([weights, bias])
        stocSelect = random.sample(list(np.arange(len(dMat_train))),batch_num)
        miniBatch = dMat_train[stocSelect]
        miniLabels = lMat_train[stocSelect]
        # weights, bias, v = NAG(miniBatch, miniLabels, weights, bias, k, alpha, v)
        weights, bias = stocGradDescent(miniBatch, miniLabels, weights, bias)
        right,wrong = predict(miniBatch, miniLabels, weights, bias)
        print ("iter:"+str(i)+" train acc:"+str(right/(right+wrong)))
    dMat_test, lMat_test = loadDataSet("F:\\实验\\lab2\\a9a.t")
    right,wrong = predict(dMat_test, lMat_test, weights, bias)
    print ("test acc:"+str(right/(right+wrong)))
    
if __name__ == "__main__":
    train()

iter:0 train acc:0.625
iter:1 train acc:0.71875
iter:2 train acc:0.640625
iter:3 train acc:0.703125
iter:4 train acc:0.7109375
iter:5 train acc:0.7109375
iter:6 train acc:0.65625
iter:7 train acc:0.75
iter:8 train acc:0.6484375
iter:9 train acc:0.7109375
iter:10 train acc:0.734375
iter:11 train acc:0.640625
iter:12 train acc:0.75
iter:13 train acc:0.765625
iter:14 train acc:0.75
iter:15 train acc:0.7734375
iter:16 train acc:0.71875
iter:17 train acc:0.75
iter:18 train acc:0.71875
iter:19 train acc:0.6953125
iter:20 train acc:0.703125
iter:21 train acc:0.6796875
iter:22 train acc:0.7734375
iter:23 train acc:0.75
iter:24 train acc:0.7421875
iter:25 train acc:0.71875
iter:26 train acc:0.6875
iter:27 train acc:0.734375
iter:28 train acc:0.78125
iter:29 train acc:0.703125
iter:30 train acc:0.6953125
iter:31 train acc:0.7109375
iter:32 train acc:0.734375
iter:33 train acc:0.671875
iter:34 train acc:0.71875
iter:35 train acc:0.765625
iter:36 train acc:0.75
iter:37 train acc:0.7265625
iter:38 

iter:348 train acc:0.71875
iter:349 train acc:0.796875
iter:350 train acc:0.78125
iter:351 train acc:0.734375
iter:352 train acc:0.765625
iter:353 train acc:0.765625
iter:354 train acc:0.734375
iter:355 train acc:0.671875
iter:356 train acc:0.7734375
iter:357 train acc:0.75
iter:358 train acc:0.796875
iter:359 train acc:0.7421875
iter:360 train acc:0.71875
iter:361 train acc:0.703125
iter:362 train acc:0.7109375
iter:363 train acc:0.7890625
iter:364 train acc:0.796875
iter:365 train acc:0.671875
iter:366 train acc:0.78125
iter:367 train acc:0.7734375
iter:368 train acc:0.703125
iter:369 train acc:0.75
iter:370 train acc:0.6796875
iter:371 train acc:0.765625
iter:372 train acc:0.828125
iter:373 train acc:0.7890625
iter:374 train acc:0.765625
iter:375 train acc:0.6953125
iter:376 train acc:0.78125
iter:377 train acc:0.8046875
iter:378 train acc:0.8046875
iter:379 train acc:0.75
iter:380 train acc:0.7421875
iter:381 train acc:0.7109375
iter:382 train acc:0.7109375
iter:383 train acc:0.789

iter:699 train acc:0.7734375
iter:700 train acc:0.734375
iter:701 train acc:0.8359375
iter:702 train acc:0.7109375
iter:703 train acc:0.734375
iter:704 train acc:0.765625
iter:705 train acc:0.7578125
iter:706 train acc:0.6796875
iter:707 train acc:0.6953125
iter:708 train acc:0.7421875
iter:709 train acc:0.7734375
iter:710 train acc:0.75
iter:711 train acc:0.75
iter:712 train acc:0.75
iter:713 train acc:0.7578125
iter:714 train acc:0.7109375
iter:715 train acc:0.7890625
iter:716 train acc:0.78125
iter:717 train acc:0.8125
iter:718 train acc:0.71875
iter:719 train acc:0.734375
iter:720 train acc:0.734375
iter:721 train acc:0.71875
iter:722 train acc:0.734375
iter:723 train acc:0.75
iter:724 train acc:0.7734375
iter:725 train acc:0.734375
iter:726 train acc:0.7890625
iter:727 train acc:0.7109375
iter:728 train acc:0.7109375
iter:729 train acc:0.7890625
iter:730 train acc:0.734375
iter:731 train acc:0.75
iter:732 train acc:0.734375
iter:733 train acc:0.6484375
iter:734 train acc:0.78125
i

KeyboardInterrupt: 