In [4]:
import numpy as np
import pandas as pd
import time
from sklearn.svm import SVC,LinearSVC
from sklearn.multiclass import OneVsRestClassifier
import sklearn.metrics
import copy

In [5]:
def read(readdata):
    data = np.array(readdata);
    data = np.delete(data, 0, 0)
    data = np.swapaxes(data,0,1)
    for i in range(0, np.shape(data)[0]):
        data[i,:] = data[i,:] - np.min(data[i,:])
        data[i,:] = (data[i,:]/np.ndarray.max(data[i,:]))*2 - 1
    # Need to seperate the classification dimension:
    classIndex = np.shape(data)[0] - 1
    classification = data[classIndex]
    data = np.delete(data, classIndex, axis=0)
    data = data.astype(float)
    d = np.shape(data)[0]
    nPoints = np.shape(data)[1]
    colour = np.chararray((len(classification)),unicode = True)
    colour = np.chararray((len(classification)),unicode = True)
    for i in range(len(classification)):
        if classification[i] == 1:
            colour[i] = 'r'
        elif classification[i] == 0:
            colour[i] = 'b'
        elif classification[i] == -1:
            colour[i] = 'g'
    return data,classification,d,nPoints,colour

readdata = pd.read_csv("/Users/karanmukhi/Desktop/TheGrandTour/Datasets/wineData.txt", sep=",", header=None);
(data, classification, d, nPoints, colour) = read(readdata)
labels = np.array([-1,0,1])

In [35]:
def iterator(X, G, tau):
    """
    Computes Y(tau) or X^{t+1} given X^t and G
    """
    
    if np.shape(X)[1] > 0.5 * np.shape(X)[0]:
        I = np.identity(np.shape(X)[0])
        W = np.matmul(G, X.T) - np.matmul(X, G.T)
        term = (I + (tau / 2) * W)
        Y = np.matmul(np.linalg.inv(term), np.matmul(term, X))
    else:
        I = np.identity(np.shape(X)[1]*2)
        U = np.concatenate((G, X), axis=1)
        V = np.concatenate((X, -G), axis=1)
        B = np.identity(np.shape(X)[1]*2) + (tau / 2) * np.matmul(V.T, U)
        B = np.linalg.inv(B)
        B = np.matmul(U,B)
        A = np.matmul(V.T,X)
        Y = X - tau*np.matmul(B,A)
    return Y

def derivative(X, costfn, eps = 0.0001):
    G = np.zeros((np.shape(X)[0], np.shape(X)[1]))
    for i in range(np.shape(X)[0]):
        for j in range(np.shape(X)[1]):
            A = copy.deepcopy(X)
            A[i,j] += eps
            G[i,j] = (costfn(A) - costfn(X)) / eps
    return G

def hingeLoss(nData):
    clf = OneVsRestClassifier(SVC(kernel='linear'), n_jobs=-1)
    clf.fit(nData, classification)
    prob = clf.decision_function(nData)
    score = sklearn.metrics.hinge_loss(classification,prob,labels)
    return score

def costfn(X):
    xData = transform(X,data)
    w = hingeLoss(xData)
    return w

def transform(X, data):
    xData = np.dot(data.T,X)
    return xData

def SVM(X):
    nData = transform(X,data)
    clf = OneVsRestClassifier(SVC(kernel='linear'), n_jobs=-1)
    clf.fit(nData, classification)
    score = clf.score(nData, classification)
    return score

def sigma(i):
    return 0.1;

def noise(X,var):
    beta = 1 - sqrt(2)/2
    I = np.identity(np.shape(X)[0])
    A = beta*np.matmul(X,X.T)
    A = I-A
    B = np.random.normal(0,var,(np.shape(X)))
    return matmul(A,B)

In [36]:
def optimise(data, metric, p, tau, T):
    '''
    costfunc - the function to be minimised
    p - number of dimensions to project onto
    tau - step size on Steifel manifold
    T - number of steps
    '''
    n = np.shape(data)[0] #native dimensions of data
    o = np.shape(data)[1]
    X = np.zeros((n,p)) 
    xData = np.zeros((o,p))
    tX = np.zeros((T,n,p))
    tData = np.zeros((T,o,p))
    for i in range (p):
        X[i,i] = 1
    for i in range(T):
        start = time.time()
        G = derivative(X, costfn)
        Z = G + sigma(i)*noise(X,var)
        Y = iterator(X,Z,tau)
        X = Y
        tX[i,:,:] = X
        tData[i,:,:] = transform(X,data) 
        end = time.time()
        print("Time remaining:", (end-start)*(T-i), costfn(X), SVM(X), end = "\r")
    tData = np.swapaxes(tData,1,2)
    tX = np.swapaxes(tX,1,2)
    return tData, tX
    



In [22]:
tData, tBeta = optimise(data, hingeLoss, 2, 1, 100)

Time remaining: 0.5356249809265137 0.018344689703444564 0.9943502824858758

In [28]:
import sys
import numpy as np
import pyqtgraph as pg
from pyqtgraph.Qt import QtGui, QtCore


if __name__ == '__main__':
    
    plotSVM = 0
    plotNN = 0
    plotDT = 0
    plotBasis = 1
    plotTour = 1
    
    read = 1
    xAxisIndex = 0
    yAxisIndex = 1
    d = np.shape(tData)[1]
    nSteps = np.shape(tData)[0]
    app = 0
    app = QtGui.QApplication([])
    win = pg.GraphicsWindow(title = "GrandTour with SVM")
    win.setGeometry(0,0,1400,1200)
    win.setWindowTitle('GrandTour with SVM')
    pg.setConfigOptions(antialias=True) #dunno what that means

    
    def pause(): 
        global p
        if p == 0:
            p = 1
            pBtn.setText("Play")
            print(j)
        elif p == 1:
            p = 0
            pBtn.setText("Pause")
    
    def forward():
        global j
        j += 1
    def back():
        global j
        j -= 1
    
    j = 0
    p = 0
    jmax = nSteps + 1
    
    proxy = QtGui.QGraphicsProxyWidget()
    pBtn = QtGui.QPushButton("Pause")
    pBtn.clicked.connect(pause)
    proxy.setWidget(pBtn)  
    win.addItem(proxy,row=3,col=1)
    
    fproxy = QtGui.QGraphicsProxyWidget()
    fBtn = QtGui.QPushButton("Forward")
    fBtn.clicked.connect(forward)
    fproxy.setWidget(fBtn)  
    win.addItem(fproxy,row=3,col=2)
    
    bproxy = QtGui.QGraphicsProxyWidget()
    bBtn = QtGui.QPushButton("Back")
    bBtn.clicked.connect(back)
    bproxy.setWidget(bBtn)  
    win.addItem(bproxy,row=3,col=0)
    
    if plotTour == 1:
        w1 = win.addPlot(title="Transformed Points", col = 0, colspan = 2, row = 0, rowspan = 3)
        w1.setGeometry(0,100,14,12)
        nPoints = np.shape(tData)[2]
        s1 = pg.ScatterPlotItem(size=10, pen=pg.mkPen(None), brush=pg.mkBrush(0, 227, 17,120))
        pos = np.zeros((nPoints,2))
        pos[:,0] = tData[0,xAxisIndex,:]
        pos[:,1] = tData[0,yAxisIndex,:]
        if read == 1:
            s1 = pg.ScatterPlotItem(pos=pos, brush = colour, size=0.03, pxMode=False,pen=pg.mkPen(None))
        else: s1 = pg.ScatterPlotItem(pos=pos, size=0.03, pxMode=False,pen=pg.mkPen(None))
        w1.addItem(s1)
        w1.setLabel(text = "Transformed Basis: " + str(xAxisIndex), axis = 'bottom' )
        w1.setLabel(text = "Transformed Basis: " + str(yAxisIndex), axis = 'left' )
        w1.setXRange(-1.5,1.5,padding = None)
        w1.setYRange(-1.5,1.5,padding = None)
    
    if plotSVM == 1 or plotNN == 1 or plotDT == 1:
        jmax = np.argmax(accuracy)
        w2 = win.addPlot(title="Accuracy Plots", col = 0, colspan = 2,row = 2)
        w2.setXRange(0,100,padding = None)
        w2.setYRange(0,1,padding = None)
        SVMcurve = w2.plot(pen='r')
        showSVM = pg.TextItem(text = 'SVM: ' + str(accuracy[0]))
        showSVM.setParentItem(w2)
        NNcurve = w2.plot(pen='b')
        showNN = pg.TextItem(text = 'NN: ' + str(accuracy[0]))
        #showNN.setParentItem(w2)
        DTcurve = w2.plot(pen='g')
        showDT = pg.TextItem(text = 'DT: ' + str(accuracy[0]))
        #showDT.setParentItem(w2)
        jmax = np.argmax(accuracy)
    
    if plotBasis == 1:
        basis = np.arange(0,d)
        w3 = win.addPlot(title= "x Basis", row = 1, col = 2)
        w3.setYRange(0,1,padding = None)
        w4 = win.addPlot(title= "y Basis", row = 2, col = 2)
        w4.setYRange(0,1,padding = None)
    
    
    
    
    def update():
        global j, p
        
        if j > np.shape(tData)[0]-1:
            j =0
            
        if plotTour == 1:    
            pos[:,0] = tData[j,xAxisIndex,:]
            pos[:,1] = tData[j,yAxisIndex,:]
            if read == 1:
                s1.setData(pos = pos,brush = colour)
            else: s1.setData(pos = pos)
        if plotSVM == 1:
            SVMcurve.setData(SVMaccuracy[0:j])
            showSVM.setText('SVM: ' + str(round(SVMaccuracy[j],3)))
        if plotNN == 1:
            NNcurve.setData(NNaccuracy[0:j])
            showSVM.setText('NN: ' + str(round(NNaccuracy[j],3)))
        if plotDT == 1:
            DTcurve.setData(DTaccuracy[0:j])
            showSVM.setText('DT: ' + str(round(DTaccuracy[j],3)))
        
            
        if plotBasis == 1:
            w3.clear()
            w4.clear()
            basis1 = pg.BarGraphItem(x = basis, height = abs(tBeta[j,xAxisIndex,0]), width=0.1, brush='g')
            w3.addItem(basis1)
            basis2 = pg.BarGraphItem(x = basis, height = abs(tBeta[j,yAxisIndex,0]), width=0.1, brush='g')
            w4.addItem(basis2)
    
       
        
            
        if not p:
            if plotSVM == 1 or plotNN == 1 or plotDT == 1:
                w2.setXRange(0,np.shape(tData)[0],padding = None)
            j += 1
            if j == jmax:
                pause()
    t = QtCore.QTimer()
    t.timeout.connect(update)
    t.start(60)

## 2) Spots are transform-invariant, but not identical (top-right plot). 
## In this case, drawing is almsot as fast as 1), but there is more startup 
## overhead and memory usage since each spot generates its own pre-rendered 
## image.
    sys.exit(app.exec_())
    



53


SystemExit: 0

In [34]:
B = np.random.normal(0,1,(2,3))
print(B)

[[-2.65327417  0.07575286  0.53398488]
 [-1.10024406 -1.46001481  0.9307025 ]]


In [None]:
tBeta[1]