In [1]:
import numpy as np
from numpy import linalg as lg
from sklearn.utils import shuffle

## Log loss function and its derivative

Because log loss function takes vector as parameter we must take derivative of all vectors coordinates as compute their sum

<img src="https://habrastorage.org/files/c3f/b82/a87/c3fb82a8728b494485038d4aabd0cd95.png"/>
<img src="https://habrastorage.org/files/b0a/7d8/429/b0a7d84296674cafa2744d6417ebd960.png"/>

In [2]:
def loss_function(w, x, y, gamma):
    length = x.shape[0]
    f_sum = 0
    
    for i in range(0, length):
        exp_power = -np.dot(w.T, x[i]) * y[i]
        exp_value = np.exp(exp_power)
        log_value = np.log(1 + exp_value)
        f_sum += log_value
    
    return f_sum

def weight_coordinate_derivative(w, x, y, dim):
    return -(x[dim] * y * np.exp(-np.dot(w.T, x) * y)) / (1 + np.exp(-np.dot(w.T, x)  * y))

def weights_norm_derivative(w, gamma):
    length = w.shape[0]
    
    derivative_sum = 0
    res = np.zeros(length)
    for i in range(0, length):
        res[i] += gamma * w[i] / (sum([w[j] ** 2 for j in range(0, length)]) ** 0.5)
    
    return res

def loss_function_derivative(w, x, y, gamma):
    length = x.shape[0]
    dimention = w.shape[0]
    derivative_sum = 0
    
    res = np.zeros(dimention)
    
    for i in range(0, length):
        for j in range(0, dimention):
            res[j] += weight_coordinate_derivative(w, x[i], y[i], j)
            
    return res

def loss_function_one_object_derivative(w, x, y, gamma):
    length = w.shape[0]
    
    res = np.zeros(length)
    
    for j in range(0, length):
        res[j] += weight_coordinate_derivative(w, x, y, j)
    
    return res


In [3]:
from sklearn.utils import shuffle

def GD(X_train, y_train, max_epoch=1000, alpha=0.1, gamma=0, tol=0.1):
    dimension = X_train.shape[1]
    num = X_train.shape[0]
    
#     weights = np.random.uniform(-1.0 / (2 * dimension), 1.0 / (2 * dimension), dimension)
    weights = np.zeros(dimension)
    l = loss_function(weights, X_train, y_train, gamma)
    
    l_array = [l]
    
    for i in range(max_epoch):
        weights -= alpha * loss_function_derivative(weights, X_train, y_train, gamma) * 2 # because of normalization

        
        l_new = loss_function(weights, X_train, y_train, gamma)
        
        l_array.append(l_new)
        
        if (abs(l - l_new) < tol):
            break
        
        l = l_new
    return weights, l_array
    
    
def SGD(X_train, y_train, max_epoch=1000, alpha=0.1, gamma=0):
    
    dimension = X_train.shape[1]
    num = X_train.shape[0]
    
    weights = np.zeros(dimension)
    
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    l = loss_function(weights, X_train, y_train, gamma)
    
    l_array = [l]
    
    for i in range(max_epoch):
        weights -= alpha / (i + 1) * loss_function_one_object_derivative(weights, X_train[i % num], y_train[i % num], gamma)
        new_l = loss_function(weights, X_train, y_train, gamma)
        l_array.append(new_l)
        
    return weights, l_array

In [4]:

from sklearn.preprocessing import scale

x = np.random.uniform(1, 1000, (100, 3))
y = np.zeros(100)
i = 0

x = scale(x)

real_w = np.array([7, 45, 13])

for obj in x:
    y[i] = sum(obj * real_w)
    i += 1

w, l = GD(x, y, 2000, 0.01, 0)
print w
print loss_function(w, x, y, 0)

[  6.72135024  50.28409094  17.52148503]
2.61788823435


## Regularization

In [5]:
import pickle

In [6]:
data = pickle.load(open('first_dataset.pkl'))

In [7]:
test_data = data['test_data']
test_labels = data['test_labels']

train_data = data['train_data']
train_labels = data['train_labels']

In [8]:
scaled_train_data = scale(train_data)


weights, errors = GD(scaled_train_data, train_labels)
weights_with_gamma, errors = GD(scaled_train_data, train_labels, gamma=1)

In [9]:
result = np.zeros(test_data.shape[0])
result_gamma = np.zeros(test_data.shape[0])

i = 0
for obj in test_data:
    res = sum(weights * obj)
    res_gamma = sum(weights * obj)
    
    if res > 0:
        result[i] = 1
    else:
        result[i] = -1
        
    if res_gamma > 0:
        result_gamma[i] = 1
    else:
        result_gamma[i] = -1
    
    i += 1
    
    


i = 0

test_errors = np.zeros(test_data.shape[0])

for res in result:
    if res != test_labels[i]:
        test_errors[i] = 1

test_errors

array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0

## Model evaluation

In [10]:
data = pickle.load(open('diabetes.pkl'))

In [12]:
X = data['data']
Y = data['labels']


X = scale(X)