In [20]:
import numpy as np
import math

def read_data(): 
    x = [] 
    with open('iris.data', 'r') as f: 
        for line in f: 
            if line == '\n': 
                continue 
            a, b, c, d, e = line.split(',') 
            if e == 'Iris-setosa\n': 
                e = 1
            elif e == 'Iris-versicolor\n': 
                e = 2
            else: 
                e = 3
            x.append([float(1),float(a), float(b), float(c), float(d), float(e)]) 
    return x

def sigma(z):
    return float(1) / (1 + math.e**(-z))

def onlinegd(eta,T, reg,trainingX, trainingY):
    w = np.array([0,0,0,0,0])
    weight_sum = w
    for i in xrange(T):
        for j in xrange(trainingX.shape[0]):
            x = trainingX[j]
            y = trainingY[j]
            a = sigma(x.dot(w))
            w = w - eta*(a-y)*x - eta*2*reg*w
        weight_sum=weight_sum+w
    return w

def sgd(eta,T, reg,trainingX, trainingY):
    w = np.array([0,0,0,0,0])
    weight_sum = w
    for i in xrange(T):
        j = np.random.randint(0,trainingX.shape[0])
        x = trainingX[j]
        y = trainingY[j]
        a = sigma(x.dot(w))
        w = w - eta*(a-y)*x - eta*2*reg*w
        weight_sum=weight_sum+w
    return weight_sum / T

def bgd(eta,T, reg,trainingX, trainingY):
    w = np.array([0,0,0,0,0])
    weight_sum = w
    for i in xrange(T):
        dw = np.array([0.0,0.0,0.0,0.0,0.0])
        for j in xrange(trainingX.shape[0]):
            x = trainingX[j]
            y = trainingY[j]
            a = sigma(x.dot(w))
            dw = dw+ (a-y)*x
        w = w - eta*dw/trainingX.shape[0] - eta*2*reg*w
        weight_sum=weight_sum+w
    return w

def bgd_vec(eta,T, reg,x, y):
    w = np.matrix([0,0,0,0,0])
    weight_sum = w
    for i in xrange(T):
        z = np.squeeze(np.asarray(np.dot(x,w.T).T))
        a= np.array(sigma(z))  
        dz = a-y.T
        dw = np.dot(dz,x)
        w = w - eta*dw/x.shape[0] - eta*2*reg*w
        weight_sum = weight_sum+w
    return np.squeeze(np.asarray(w ))

def cost_func(w, x, y,reg):
    log_func_v = sigma(x.dot(w))
    y = np.squeeze(y)
    step1 = y * np.log(log_func_v)
    step2 = (1-y) * np.log(1 - log_func_v)
    final = -step1 - step2 
    return np.mean(final)+reg*w.dot(w)

def validate(x,y,w,reg):
    count_good = 0
    for j in xrange(x.shape[0]):
        y_reg =1 if sigma(x[j].dot(w))>0.5 else 0
        if y_reg == y[j][0]:
            count_good+=1
    print "Ld =", cost_func(w,x,y,reg),"correct = ", float(count_good)/x.shape[0]
            
def model(eta,T,reg,x,iris_num, algo):
    trainingX = x[:9*x.shape[0]/10,:5]
    validationX = x[9*x.shape[0]/10:,:5]
    trainingY = x[:9*x.shape[0]/10,5:]
    validationY = x[9*x.shape[0]/10:,5:]
    np.place(trainingY,trainingY!=iris_num,[0])
    np.place(trainingY,trainingY==iris_num,[1])
    np.place(validationY,validationY!=iris_num,[0])
    np.place(validationY,validationY==iris_num,[1])
    w = algo(eta,T,reg,trainingX, trainingY)
    validate(validationX, validationY,w,reg)
    return w
    
def model_kfold(eta_values,T_values,reg_values, x, algo,k,iris_num):
    trainingX = x[:9*x.shape[0]/10,:5]
    ty = x[:9*x.shape[0]/10,5:]
    np.place(ty,ty!=iris_num,[0])
    np.place(ty,ty==iris_num,[1])
    minL = 1000000
    goodEta = 0
    goodT = 0
    goodReg = 0
    for eta in eta_values:
        for reg in reg_values:
            for T in T_values:
                L = k_fold(k,trainingX,ty,reg,eta,T,algo)
                if(L<minL):
                    minL = L
                    goodT = T
                    goodEta = eta
                    goodReg = reg
    return goodEta,goodT,goodReg
    
def k_fold(k,x,y,reg,eta,T,algo):
    mk = x.shape[0]/k
    error = 0
    for i in range(k):
        validationX = x[i*mk:(i+1)*mk,:]
        validationY = y[i*mk:(i+1)*mk,:]
        trainX = np.concatenate((x[:i*mk,:], x[(i+1)*mk:,:]),axis = 0)
        trainY = np.concatenate((y[:i*mk,:], y[(i+1)*mk:,:]),axis = 0)
        w = algo(eta,T,reg,trainX, trainY)
        
        ls = cost_func(w,x,y,reg)
        error += ls
        
    return error/k
        
x = np.array(read_data())
np.random.shuffle(x)

for i in xrange(1,4):
    print "Iris number",i
    (sgdEta,sgdT,sgdReg) = model_kfold([0.001,0.005,0.01,0.1,0.3,0.5],[20,50,100,200,300],[0.001,0.005,0.01,0.1,0.3,0.5], x.copy(), sgd,5,i)
    print "sgd ",i,(sgdEta,sgdT,sgdReg)
    w = model(sgdEta,sgdT,sgdReg,x.copy(),i,sgd)
    print "w",w
    (bgdEta,bgdT,bgdReg) = model_kfold([0.001,0.005,0.01,0.1,0.3,0.5],[20,50,100,200,300],[0.001,0.005,0.01,0.1,0.3,0.5], x.copy(), bgd_vec,5,i)
    print "bgd",i,(bgdEta,bgdT,bgdReg)
    w = model(bgdEta,bgdT,bgdReg,x.copy(),i,bgd_vec)
    print "w",w
    





Iris number 1
sgd  1 (0.5, 300, 0.001)
Ld = 0.0242809887471 correct =  1.0
w [ 0.37654995  0.54296351  2.36726421 -3.39984738 -1.75808555]
bgd 1 (0.5, 300, 0.001)
Ld = 0.0233638012001 correct =  1.0
w [ 0.32222756  0.51238393  1.73203106 -2.74875008 -1.22243657]
Iris number 2
sgd  2 (0.01, 300, 0.001)
Ld = 0.571495194109 correct =  0.8
w [-0.01871263 -0.06608165 -0.16092272  0.12569011  0.0119484 ]
bgd 2 (0.1, 300, 0.001)
Ld = 0.485847787162 correct =  0.8
w [ 0.22387324  0.23854458 -1.0134083   0.38319592 -0.60132857]
Iris number 3
sgd  3 (0.1, 300, 0.001)
Ld = 0.190774109148 correct =  1.0
w [-0.4737664  -1.31850187 -1.02681664  1.95693202  1.39686599]
bgd 3 (0.3, 300, 0.001)
Ld = 0.135140326114 correct =  1.0
w [-1.15005796 -2.14101338 -1.91280044  3.14161969  2.78399156]
