In [68]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
src_breast_cancer = 'breast_cancer/wdbc.csv'
src_ionosphere = 'ionosphere/ionosphere.csv'

bc_data = pd.read_csv(src_breast_cancer, delimiter=',')
io_data = pd.read_csv(src_ionosphere, delimiter=',')

In [69]:
# Get data as np array and split bc_classes/bc_features
bc_classes = bc_data[bc_data.columns[1]].values
bc_features = bc_data[bc_data.columns[2:]].values
print(bc_classes.shape, bc_features.shape)
io_classes = io_data[io_data.columns[-1]].values
io_features = io_data[io_data.columns[:-1]].values
print(io_classes.shape, io_features.shape)

(568,) (568, 30)
(350,) (350, 34)


In [70]:
# Process bc_features into 0 and 1 class
bc_classes[bc_classes == 'M'] = 1
bc_classes[bc_classes == 'B'] = -1
print('Number of maligne: ', np.count_nonzero(bc_classes == 1))
print('Number of benigne: ', np.count_nonzero(bc_classes == 0))
bc_classes = bc_classes.astype(np.int8)
# Process io_features into 0 and 1 class
io_classes[io_classes == 'b'] = 1
io_classes[io_classes == 'g'] = -1
print('Number of bad: ', np.count_nonzero(io_classes == 1))
print('Number of good: ', np.count_nonzero(io_classes == 0))
io_classes = io_classes.astype(np.int8)

Number of maligne:  211
Number of benigne:  0
Number of bad:  126
Number of good:  0


In [71]:
# 0-center data
bc_features -= np.mean(bc_features, axis=0)
io_features -= np.mean(io_features, axis=0)
# 1-center std
bc_features /= np.std(bc_features, axis=0)
io_features = np.divide(io_features, np.std(io_features, axis=0), where=np.std(io_features, axis=0) != 0.)

In [72]:
print(bc_classes.shape, bc_classes.dtype)
print(bc_features.shape)
print(io_classes.shape, io_classes.dtype)
print(io_features.shape)

(568,) int8
(568, 30)
(350,) int8
(350, 34)


In [73]:
# Loads cross validation framework
from sklearn.model_selection import cross_val_score, ShuffleSplit, train_test_split
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn import cross_validation
import seaborn as sn
bc_xtrain, bc_xtest, bc_ytrain, bc_ytest = train_test_split(bc_features, bc_classes, test_size=.2)
io_xtrain, io_xtest, io_ytrain, io_ytest = train_test_split(io_features, io_classes, test_size=.2)

In [74]:
import numpy as np 

def logistic_loss(features, label, x, l):
    """ Computes the logistic loss for a Labeled point"""
    return np.log(1.+np.exp(-label*np.dot(features, x)))+l*np.linalg.norm(x)

In [75]:
def logistic_grad(features, label, x, l):
    """ Computes the logistic gradient for a Labeled point"""
    return (-label*features*np.exp(-label*np.dot(features, x)))/(1.+np.exp(-label*np.dot(features, x)))+2.*l*x

In [76]:
def hinge_loss(features, label, x, l):
    """ Computes the logistic loss for a Labeled point"""
    return max(0., 1.-label*np.dot(features, x))+l*np.linalg.norm(x)

In [77]:
def hinge_grad(features, label, x, l):
    """ Computes the logistic loss for a Labeled point"""
    if 1.-label*np.dot(features, x) < 0.:
        return np.zeros(features.shape[0])
    else:
        return -label*features+2.*l*x

In [78]:
def hinge_square_loss(features, label, x, l):
    """ Computes the logistic loss for a Labeled point"""
    return max(0., 1.-label*np.dot(features, x))**2+l*np.linalg.norm(x)

In [79]:
def hinge_square_grad(features, label, x, l):
    """ Computes the logistic loss for a Labeled point"""
    if 1.-label*np.dot(features, x) < 0.:
        return np.zeros(features.shape[0])
    else:
        return -2.*label*features*(1.-label*np.dot(features, x))+2.*l*x

In [80]:
def template(x_init, features, labels, loss_function, gradient_loss_function, n_epochs, lamb, learning_rate=1e-3):
    x = x_init
    n_samples = len(features)
    n_print = n_epochs // 10
    for epoch in range(n_epochs):
        ############ A REMPLIR
        grad = XXXXX
        
        x -= learning_rate * grad
        
        # Compute loss of whole dataset
        if epoch % n_print == 0:
            loss = np.mean([loss_function(f, l, x, lamb) for f, l in zip(features, labels)])
            print('Epoch ', epoch+1, ' Loss: ', loss)

In [81]:
def preds(x, tefeatures):
    p = 1. / (1. + np.exp(-np.dot(tefeatures, x)))
    p[p < .5] = -1
    p[p >= .5] = 1
    return p

In [82]:
def accuracy(real, preds):
    return np.sum(real == preds) / float(real.shape[0])

In [137]:
import time

def ggwp(algo_descent, n_epochs, reg, learning_rate=1e-3):
    for loss_type in ((logistic_loss, logistic_grad), (hinge_loss, hinge_grad), (hinge_square_loss, hinge_square_grad)):
        for i, dataset in enumerate(((bc_xtrain, bc_ytrain, bc_xtest, bc_ytest), (io_xtrain, io_ytrain, io_xtest, io_ytest))):
            loss = loss_type[0]
            grad = loss_type[1]
            trfeatures, trlabels = dataset[0], dataset[1]
            tefeatures, telabels = dataset[2], dataset[3]
            x_init = np.zeros(trfeatures[0].shape[0])
            print('XXXXXXXXXXXXXXXXXXXXXXX\nDataset', i, 'Loss type', loss.__name__)
            start = time.time()
            x = algo_descent(x_init=x_init,
                             features=trfeatures,
                             labels=trlabels,
                             loss_function=loss, 
                             gradient_loss_function=grad,
                             n_epochs=n_epochs,
                             lamb=reg,
                             learning_rate=learning_rate)
            print('Time elapsed', time.time() - start)
            print('Accuracy', accuracy(telabels, preds(x, tefeatures)))

In [138]:
# Batch gradient descent
def batch_gd(x_init, features, labels, loss_function, gradient_loss_function, n_epochs, lamb, learning_rate=1e-3, batch_size=None, momentum=None):
    x = x_init
    n_samples = len(features)
    n_print = 10
    for epoch in range(n_epochs):
        ############ A REMPLIR
        grad = np.mean([gradient_loss_function(f, lab, x, lamb) for f, lab in zip(features, labels)], axis=0)
        if momentum:
            x = x*momentum - learning_rate*grad
        else:
            x -= learning_rate * grad
        # Compute loss of whole dataset
        if epoch+1 % n_print == 0:
            loss = np.mean([loss_function(f, l, x, lamb) for f, l in zip(features, labels)])
            print('Epoch ', epoch+1, ' Loss: ', loss)
    
    return x

In [139]:
ggwp(batch_gd, 100, 1e-3)

XXXXXXXXXXXXXXXXXXXXXXX
Dataset 0 Loss type logistic_loss
Time elapsed 1.8593499660491943
Accuracy 0.938596491228
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 1 Loss type logistic_loss
Time elapsed 1.099128007888794
Accuracy 0.771428571429
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 0 Loss type hinge_loss
Time elapsed 0.8316330909729004
Accuracy 0.938596491228
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 1 Loss type hinge_loss
Time elapsed 0.5320870876312256
Accuracy 0.771428571429
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 0 Loss type hinge_square_loss
Time elapsed 1.4813802242279053
Accuracy 0.947368421053
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 1 Loss type hinge_square_loss
Time elapsed 1.0458521842956543
Accuracy 0.8


In [143]:
#Mini-Batch gradient descent
import random

def minibatch_gd(x_init, features, labels, loss_function, gradient_loss_function, n_epochs, lamb, learning_rate=1e-3, batch_size=5, momentum=None):
    x = x_init
    n_samples = len(features)
    n_print = 101010101010101010
    for epoch in range(n_epochs):
        ############ A REMPLIR
        for i in range (0, features.shape[0], batch_size):
            ids = np.random.choice(n_samples, batch_size)
            features_mini = features[ids]
            labels_mini = labels[ids]
            grad = np.mean([gradient_loss_function(f, lab, x, lamb) for f, lab in zip(features_mini, labels_mini)], axis=0)
            x -= learning_rate * grad
        # Compute loss of whole dataset
        if epoch+1 % n_print == 0:
            loss = np.mean([loss_function(f, l, x, lamb) for f, l in zip(features, labels)])
            print('Epoch ', epoch+1, ' Loss: ', loss)
    
    return x

In [144]:
ggwp(minibatch_gd, n_epochs=100, reg=1e-3, learning_rate=0.1)

XXXXXXXXXXXXXXXXXXXXXXX
Dataset 0 Loss type logistic_loss
Time elapsed 2.7020859718322754
Accuracy 0.964912280702
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 1 Loss type logistic_loss
Time elapsed 1.7534759044647217
Accuracy 0.885714285714
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 0 Loss type hinge_loss
Time elapsed 1.463737964630127
Accuracy 0.973684210526
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 1 Loss type hinge_loss
Time elapsed 0.9808359146118164
Accuracy 0.857142857143
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 0 Loss type hinge_square_loss
Time elapsed 1.5184569358825684
Accuracy 0.947368421053
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 1 Loss type hinge_square_loss
Time elapsed 1.0904130935668945
Accuracy 0.828571428571


In [145]:
# Batch gradient descent
def sgd(x_init, features, labels, loss_function, gradient_loss_function, n_epochs, lamb, learning_rate=1e-3, momentum=None):
    x = x_init
    n_samples = len(features)
    n_print = 10
    for epoch in range(n_epochs):
        for _ in range(n_samples):
            i = np.random.randint(n_samples)
            grad = gradient_loss_function(features[i], labels[i], x, lamb)
            if momentum:
                x = x*momentum - learning_rate*grad
            else:
                x -= learning_rate * grad
        # Compute loss of whole dataset
        if epoch+1 % n_print == 0:
            loss = np.mean([loss_function(f, l, x, lamb) for f, l in zip(features, labels)])
            print('Epoch ', epoch+1, ' Loss: ', loss)
    
    return x

In [146]:
ggwp(sgd, 100, 1e-3)

XXXXXXXXXXXXXXXXXXXXXXX
Dataset 0 Loss type logistic_loss
Time elapsed 2.5395119190216064
Accuracy 0.973684210526
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 1 Loss type logistic_loss
Time elapsed 1.7058229446411133
Accuracy 0.871428571429
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 0 Loss type hinge_loss
Time elapsed 1.2503480911254883
Accuracy 0.973684210526
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 1 Loss type hinge_loss
Time elapsed 0.9155521392822266
Accuracy 0.9
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 0 Loss type hinge_square_loss
Time elapsed 1.4676530361175537
Accuracy 0.956140350877
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 1 Loss type hinge_square_loss
Time elapsed 1.1749019622802734
Accuracy 0.871428571429


In [147]:
import math
# Adagrad
def adagrad(x_init, features, labels, loss_function, gradient_loss_function, n_epochs, lamb, learning_rate=1e-3, momentum=None, epsilon=1e-8):
    x = x_init
    n_samples = len(features)
    n_print = 101010101010101010
    Gt = np.zeros((features[0].shape[0], features[0].shape[0]))
    for epoch in range(n_epochs):
        for _ in range(n_samples):
            i = np.random.randint(n_samples)
            grad = gradient_loss_function(features[i], labels[i], x, lamb)
            for j, g in enumerate(grad):
                Gt[j, j] += g*g
            x = np.asarray([xi - learning_rate / math.sqrt(Gt[v, v] + epsilon) * g for xi, v, g in zip(x, range(Gt.shape[0]), grad)])
        # Compute loss of whole dataset
        if epoch+1 % n_print == 0:
            loss = np.mean([loss_function(f, l, x, lamb) for f, l in zip(features, labels)])
            print('Epoch ', epoch+1, ' Loss: ', loss)
    
    return x

In [None]:
ggwp(adagrad, 100, 1e-3)

XXXXXXXXXXXXXXXXXXXXXXX
Dataset 0 Loss type logistic_loss
Time elapsed 5.657454967498779
Accuracy 0.938596491228
XXXXXXXXXXXXXXXXXXXXXXX
Dataset 1 Loss type logistic_loss
