In [1]:
import numpy as np
import sklearn as sk
import math
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [2]:
with open('wine.data','r') as f:
    x = f.read().split()
    
# convert text to a list
txt_list = []
for i in range(0,len(x)):
    y = x[i].split(',')
    for j in range(0,len(y)):
        y[j] = float(y[j])
    txt_list.extend([y])
    
wine_data = np.array(txt_list)

In [3]:
# split wine data into the target and data
labels = wine_data[:,0]
wine_data = wine_data[:,1:]

In [4]:
wine_data_train = wine_data[labels <= 2.5] # get rid of index 3
label_train     = labels[labels<=2.5]

In [5]:
# turn labels into binary
label_train[label_train ==1] = 0
label_train[label_train ==2] = 1

In [6]:
label_train

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [7]:
wine_data_train.shape,label_train.shape

((130, 13), (130,))

In [8]:
minmax = MinMaxScaler()

In [9]:
wine_data_train_norm = minmax.fit_transform(wine_data_train)

In [10]:
from sklearn.linear_model import LogisticRegression as LR

In [11]:
sklr = LR(C = 10000000)

In [12]:
sklr.fit(wine_data_train_norm,label_train)



LogisticRegression(C=10000000, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='warn', n_jobs=None, penalty='l2', random_state=None,
          solver='warn', tol=0.0001, verbose=0, warm_start=False)

In [13]:
#### LOG LOSS
loss = - 1*sk.metrics.log_loss(sklr.predict(wine_data_train_norm),label_train)
print(loss)

-9.992007221626415e-16


In [14]:
sklr.score(wine_data_train_norm,label_train)

1.0

#### MY Section ####

In [15]:
def sigmoid(weights,x,b):
#     print(weights.T@x,b)
    sig = (1/(1+math.exp(-1*(weights.T@x+b))))

    return(sig)



def likelihood(train_data,target_data,weights,b):
    #print(weights.shape,train_data[0].shape)
    L = 0
    for i in range(len(train_data)):
        L += (target_data[i]*math.log(sigmoid(weights,train_data[i],b))+\
             (1-target_data[i])*math.log(sigmoid(weights,-1*train_data[i],-b)))   
    return -L

# def log_loss(X,Y,W,b):
#     l_loss = 0
#     #print(W.T.shape,X[0].shape)
#     for i in range(len(Y)):
#         loss = np.log(1+np.exp((-Y[i]*(W.T@X[i]+b))))
#         l_loss +=loss
#     return -1*l_loss



# routine to use the errors to calculate the weight update
def calc_grad_w(weights,b,train_data,target_data):

    
    dL_dWi = np.zeros((13,1))
    for i in range(len(weights)):
        
        grad_update = 0 # stores our total weight updates
        
        for t in range(len(target_data)): # iterate over all of the data_ppoints
        
            # dL_dWi = (y_t - sigma(wx_t))*x_it
            delta = (target_data[t] - sigmoid(weights,train_data[t],b))\
                    *train_data[t,i]
            
            grad_update +=delta # accumulate the weight update over samples
            
        # after seeing the whole set, update the weight for the ith weight element
        dL_dWi[i]=grad_update 
        
    return dL_dWi,delta


def calc_grad_b(weights,b,train_data,target_data):
    g_loss_b = 0
    for j in range(len(target_data)):
        delta = target_data[j] - sigmoid(weights,train_data[j],b)
        g_loss_b += delta
    return g_loss_b


# def grad_loss_w(X,Y,W,b):
#     g_loss_w = np.zeros((len(W),1))
#     for i in range(len(W)):
#         for j in range(len(Y)):
#             g_l_w = -(Y[j]*X[j,i])/(1+np.exp(Y[j]*(W.T@X[j]+b)))
#             g_loss_w[i] += g_l_w
    
#     return g_loss_w

# def grad_loss_b(X,Y,W,b):
#     g_loss_b = 0
#     for j in range(len(Y)):
#         g_l_b = -(Y[j])/(1+np.exp(Y[j]*(W.T@X[j]+b)))
#         g_loss_b += g_l_b
#     return g_loss_b

def update(var,gradient,rate):
    
    var_new = var + rate*gradient
    return var_new


# go in the direction of maximum gradient
def alt_no_rep_update(var,gradient,rate,last_index):
    
    if last_index == None:
        max_index = np.argsort(np.abs(gradient),axis = 0)[-1]
        #print(max_index)
        var[max_index]   = update(var[max_index],gradient[max_index],rate)
        new_last_index = max_index
    
    else: # pick something that wasn't our last index
        
        max_index = np.argsort(np.abs(gradient),axis = 0)[-1]
        if max_index == last_index:
            max_index = np.argsort(np.abs(gradient),axis = 0)[-2]
        
        
        var[max_index]   = update(var[max_index],gradient[max_index],rate)
        new_last_index = max_index
        
    return var,new_last_index
        

def alt_update(var,gradient,rate):

    max_index = np.argsort(np.abs(gradient),axis = 0)[-1]   
    var[max_index]   = update(var[max_index],gradient[max_index],rate)

    return var
    

def random_update(var,gradient,rate):
    
    index = np.random.randint(len(var))  
    var[index]   = update(var[index],gradient[index],rate)

    return var
        
        
        
        
    

In [16]:
# run the program

# initialize variables
w = np.zeros((13,1))
b = 0
lr = 1
last_index = None
likelihood_log_no_rep = []

for k in range(25000):
    # calculate loss
    l = likelihood(wine_data_train_norm,label_train,w,b)
    likelihood_log_no_rep.extend([l])
    if k % 1000 == 0:
        print('iter %i, Loss: %2.5f' % (k,np.round(l,5)))
#         print('Loss:  ',np.round(l,4))
    
    # calculate gradients
    w_grad,delta = calc_grad_w(w,b,wine_data_train_norm,label_train)
    #print(type(w_grad),w_grad.shape)
    b_grad = calc_grad_b(w,b,wine_data_train_norm,label_train)
    
    # weight update
    
    w,last_index = alt_no_rep_update(w,w_grad,lr,last_index)
    b = update(b,b_grad,lr)

print('iter %i, Loss: %2.5f' % (k,np.round(l,4)))

iter 0, Loss: 90.10913
iter 1000, Loss: 0.13588
iter 2000, Loss: 0.06236
iter 3000, Loss: 0.04052
iter 4000, Loss: 0.03006
iter 5000, Loss: 0.02395
iter 6000, Loss: 0.01993
iter 7000, Loss: 0.01710
iter 8000, Loss: 0.01500
iter 9000, Loss: 0.01338
iter 10000, Loss: 0.01209
iter 11000, Loss: 0.01105
iter 12000, Loss: 0.01018
iter 13000, Loss: 0.00946
iter 14000, Loss: 0.00883
iter 15000, Loss: 0.00830
iter 16000, Loss: 0.00783
iter 17000, Loss: 0.00742
iter 18000, Loss: 0.00706
iter 19000, Loss: 0.00674
iter 20000, Loss: 0.00645
iter 21000, Loss: 0.00619
iter 22000, Loss: 0.00596
iter 23000, Loss: 0.00574
iter 24000, Loss: 0.00555
iter 24999, Loss: 0.00540


In [17]:
w

array([[-75.27246962],
       [-12.81918578],
       [-36.96634808],
       [ 52.32470433],
       [-10.59492059],
       [  0.        ],
       [  0.        ],
       [  4.16669433],
       [ -0.36849474],
       [  0.        ],
       [  0.75021154],
       [-30.25651561],
       [-47.15341448]])

In [18]:
# initialize variables
w = np.zeros((13,1))
b = 0
lr = 1
last_index = None
likelihood_log_max = []

for k in range(25000):
    # calculate loss
    l = likelihood(wine_data_train_norm,label_train,w,b)
    likelihood_log_max.extend([l])
    if k % 1000 == 0:
        print('iter %i, Loss: %2.5f' % (k,np.round(l,5)))
#         print('Loss:  ',np.round(l,4))
    
    # calculate gradients
    w_grad,delta = calc_grad_w(w,b,wine_data_train_norm,label_train)
    #print(type(w_grad),w_grad.shape)
    b_grad = calc_grad_b(w,b,wine_data_train_norm,label_train)
    
    # weight update
    
    w= alt_update(w,w_grad,lr)
    b = update(b,b_grad,lr)

print('iter %i, Loss: %2.2f' % (k,np.round(l,4)))

iter 0, Loss: 90.10913
iter 1000, Loss: 0.13566
iter 2000, Loss: 0.06242
iter 3000, Loss: 0.04049
iter 4000, Loss: 0.02997
iter 5000, Loss: 0.02380
iter 6000, Loss: 0.01974
iter 7000, Loss: 0.01687
iter 8000, Loss: 0.01474
iter 9000, Loss: 0.01309
iter 10000, Loss: 0.01177
iter 11000, Loss: 0.01070
iter 12000, Loss: 0.00981
iter 13000, Loss: 0.00906
iter 14000, Loss: 0.00842
iter 15000, Loss: 0.00787
iter 16000, Loss: 0.00738
iter 17000, Loss: 0.00696
iter 18000, Loss: 0.00658
iter 19000, Loss: 0.00624
iter 20000, Loss: 0.00594
iter 21000, Loss: 0.00567
iter 22000, Loss: 0.00542
iter 23000, Loss: 0.00520
iter 24000, Loss: 0.00499
iter 24999, Loss: 0.00


In [19]:
w

array([[-83.11416504],
       [-12.94528801],
       [-35.67872192],
       [ 52.32470433],
       [ -6.50290457],
       [  0.        ],
       [  0.        ],
       [  0.        ],
       [ -3.84240469],
       [  0.        ],
       [  0.76106167],
       [-31.16785726],
       [-52.47570376]])

In [None]:
# run the program

# initialize variables
w = np.zeros((13,1))
b = 0
lr = 1
last_index = None
likelihood_log_random = []

for k in range(25000):
    # calculate loss
    l = likelihood(wine_data_train_norm,label_train,w,b)
    likelihood_log_random.extend([l])
    if k % 1000 == 0:
        print('iter %i, Loss: %2.5f' % (k,np.round(l,5)))
#         print('Loss:  ',np.round(l,4))
    
    # calculate gradients
    w_grad,delta = calc_grad_w(w,b,wine_data_train_norm,label_train)
    #print(type(w_grad),w_grad.shape)
    b_grad = calc_grad_b(w,b,wine_data_train_norm,label_train)
    
    # weight update
    
    w = random_update(w,w_grad,lr)
    b = update(b,b_grad,lr)

print('iter %i, Loss: %2.2f' % (k,np.round(l,4)))

iter 0, Loss: 90.10913
iter 1000, Loss: 7.14402
iter 2000, Loss: 0.54909
iter 3000, Loss: 0.26427
iter 4000, Loss: 0.17968
iter 5000, Loss: 0.13873


In [None]:
w

In [None]:
def predict_y(input_data,target_data,weights,b):
    
    y_pred = sigmoid(weights,input_data,b)
    
    if y_pred > .5:
        y_pred = 1
    else:
        y_pred = 0
    
    return y_pred


wine_data_train_norm,label_train
accuracy = 0
    
for j in range(len(label_train)):
    y_pred = predict_y(wine_data_train_norm[j],label_train,w,b) # returns 0 or 1

    if y_pred == label_train[j]:
        accuracy +=1

# normalize the accuracy - out of 1400
accuracy=accuracy/len(label_train)

accuracy

In [None]:
plt.figure(figsize = (10,10))
plt.plot(range(1,len(likelihood_log_random)+1),likelihood_log_random)
plt.plot(range(1,len(likelihood_log_random)+1),likelihood_log_max)
plt.plot(range(1,len(likelihood_log_random)+1),likelihood_log_no_rep)
plt.plot(range(1,len(likelihood_log_random)+1),[.000000001]*len(likelihood_log_random))
# plt.xscale('log')
plt.legend(['random', 'max_grad', 'max_no_rep','true L*'])
plt.xscale('log')
plt.xlabel('iterations')
plt.ylabel('Loss')
plt.title('Training Log Loss')
plt.show()