In [None]:
import numpy as np
import pandas as pd
import cvxpy as cp
import cvxopt
from collections import Counter
from sklearn.metrics import confusion_matrix

## 1' Import Dataset

In [None]:
train = pd.concat([pd.read_csv("X_labeled.csv"), pd.read_csv("y_labeled.csv")],1,names =None)
train = train.drop(train.columns[[0,56]],1)

aux = pd.read_csv("X_unlabeled.csv")
aux = aux.drop(aux.columns[0],1)

print(Counter(train["target"]))
print(Counter(train["school"]))
print(Counter(aux["school"]))

## 2' Define S3VM

In [None]:
def S3VM(train, aux, C_pos,C_neg):
    
    train_pos = train[train["target"]==1]
    train_neg = train[train["target"]==-1]
    
    C_aux_pos = C_pos*np.ones(aux.shape[0])
    C_aux_neg = C_neg*np.ones(aux.shape[0])
    C_pos = C_pos*np.ones(train_pos.shape[0])
    C_neg = C_neg*np.ones(train_neg.shape[0])
    
    Yt_pos = train_pos.iloc[:,-1]  
    Xt_pos = train_pos.iloc[:,1:-1] 
    
    Yt_neg = train_neg.iloc[:,-1]  
    Xt_neg = train_neg.iloc[:,1:-1]
    
    aux = aux.iloc[:,1:]
    
    n_pos,F = Xt_pos.shape
    n_neg,F = Xt_neg.shape
    n2 = aux.shape[0]
    
    Xt_pos = np.matrix(Xt_pos)
    Yt_pos = np.array(Yt_pos)
    Xt_neg = np.matrix(Xt_neg)
    Yt_neg = np.array(Yt_neg)
    aux = np.matrix(aux)
    
    M = 1000000
    
    w = cp.Variable(F)
    e_pos = cp.Variable(n_pos)
    e_neg = cp.Variable(n_neg)
    b = cp.Variable()
    xi = cp.Variable(n2)          
    z = cp.Variable(n2) 
    d = np.ones(n2)  

    objective = cp.Minimize(0.5*cp.square(cp.norm(w)) 
                            +cp.sum(cp.multiply(C_pos,e_pos))
                            +cp.sum(cp.multiply(C_neg,e_neg))
                            +cp.sum(cp.multiply(C_aux_pos,xi))
                            +cp.sum(cp.multiply(C_aux_neg,z)))
    
    constraints =[cp.multiply(Yt_pos.T,Xt_pos*w+b) - 1 + e_pos >= 0,e_pos >= 0,
                  cp.multiply(Yt_neg.T,Xt_neg*w+b) - 1 + e_neg >= 0,e_neg >= 0,
                  aux*w + b + M*(1-d) - 1 + xi >= 0,
                  aux*w + b + M*d - 1 + xi >= 0, 
                  xi >= 0,
                  -(aux*w + b)+M*d - 1 + z >= 0,
                  -(aux*w + b)+M*(1-d) - 1 + z >= 0,
                   z >= 0]
    
    prob = cp.Problem(objective,constraints)
    
    try:
        result = prob.solve()
    except:
        try:
            result = prob.solve(solver = "ECOS_BB")
        except:
            result = prob.solve(solver = "SCS")
    return w.value, b.value

## 3' Select Best Parameter

In [None]:
def cmc(C_1,C_2):
    
    train_result = []
    w,b = S3VM(train,aux,C_1,C_2)
    result = (np.matmul(train.iloc[:,1:-1],w)+b)
    
    for i in range(0,len(result)):
        if result[i] > 0:
            train_result.append(1)
        else:
            train_result.append(-1)

    return confusion_matrix(train["target"],train_result)

diff_thre = 0.06
lower_thre = 1.6
#upper_thre = 
info = []
candidates = []
for i in 2**np.linspace(-8,8,17):
    for j in 2**np.linspace(-8,8,17):
        temp = cmc(i,j)
        acc_sum = temp[0,0]/sum(temp[0,:]) + temp[1,1]/sum(temp[1,:])
        acc_diff = abs(temp[0,0]/sum(temp[0,:]) - temp[1,1]/sum(temp[1,:]))
        info.append([i,j,acc_sum,acc_diff])
        if acc_sum >lower_thre and acc_diff<diff_thre:
            print("pos:",i,"neg:",j,"acc_sum:",acc_sum,"acc_diff:",acc_diff)
            candidates.append([i,j,acc_sum,acc_diff])
pd.DataFrame(info).to_csv('info_S3VM.csv')

In [None]:
info = pd.DataFrame.from_csv("info_S3VM.csv")
#info.iloc[:,2] = np.round(info.iloc[:,2],decimals = 1)
#info.iloc[:,3] = np.round(info.iloc[:,3],decimals = 2)
#reduced_info = info.drop_duplicates(["2","3"])
#selected = info.loc[(info.iloc[:,2]==max(info.iloc[:,2])),:]
selected = info.loc[(info.iloc[:,2]>max(info.iloc[:,2])-1)&(info.iloc[:,3]<0.1),:]
#selected = info.loc[(info.iloc[:,3]<0.05),:]
print(selected.shape)
print(selected)

## 3' Training Accuracy and Confusion Matrix

In [None]:
result_combo = []
for j in np.arange(0,len(selected)):
    count = 0
    C_pos,C_neg = selected.iloc[j,0:2]
    w,b = S3VM(train,aux,C_pos,C_neg)
    result = (np.matmul(train.iloc[:,1:-1],w)+b)/abs((np.matmul(train.iloc[:,1:-1],w)+b))
    #result = (np.matmul(train.iloc[:,1:-1],w)+b)*train.iloc[:,-1]
    result_combo.append(result)
    print(j+1,":training confusion matrix:\n",confusion_matrix(train["target"],result))
    print("%f" % ((confusion_matrix(train["target"],result)[0,0]+confusion_matrix(train["target"],result)[1,1])/train.shape[0]),"%")

voted = np.squeeze(np.asarray(sum(np.matrix(result_combo)/abs(np.matrix(result_combo)))))/abs(np.squeeze(np.asarray(sum(np.matrix(result_combo)/abs(np.matrix(result_combo))))))
print("voted confusion matrix:\n",confusion_matrix(train["target"],voted))
print("%f" % ((confusion_matrix(train["target"],voted)[0,0]+confusion_matrix(train["target"],voted)[1,1])/train.shape[0]),"%")
print("-1:","%f" % (confusion_matrix(train["target"],voted)[0,0]/sum(confusion_matrix(train["target"],voted)[0,:])),"%")
print("1:","%f" % (confusion_matrix(train["target"],voted)[1,1]/sum(confusion_matrix(train["target"],voted)[1,:])),"%")

## 4' Test Accuracy and Confusion Matrix

In [None]:
result_combo_test = []
for j in np.arange(0,len(selected)-1):
    C_pos,C_neg = selected.iloc[j,0:2]
    result = []
    for i in range(0,train.shape[0]):
        X_test = train.iloc[i,1:-1]
        w,b = S3VM(train.drop(train.index[i]),aux,C_pos,C_neg)
        result.append(sum(X_test*w)+b)
    result = np.array(result)/abs(np.array(result))
    result_combo_test.append(result)
    print(j+1,":test confusion matrix:\n",confusion_matrix(train["target"],result))
    print("%f" % ((confusion_matrix(train["target"],result)[0,0]+confusion_matrix(train["target"],result)[1,1])/train.shape[0]),"%")
voted_test = np.squeeze(np.asarray(sum(np.matrix(result_combo_test)/abs(np.matrix(result_combo_test)))))/abs(np.squeeze(np.asarray(sum(np.matrix(result_combo_test)/abs(np.matrix(result_combo_test))))))
print("voted confusion matrix:\n",confusion_matrix(train["target"],voted_test))
print("%f" % ((confusion_matrix(train["target"],voted_test)[0,0]+confusion_matrix(train["target"],voted_test)[1,1])/train.shape[0]),"%")
print("-1:","%f" % (confusion_matrix(train["target"],voted_test)[0,0]/sum(confusion_matrix(train["target"],voted_test)[0,:])),"%")
print("1:","%f" % (confusion_matrix(train["target"],voted_test)[1,1]/sum(confusion_matrix(train["target"],voted_test)[1,:])),"%")