In [1969]:
%pylab inline
import numpy as np
import matplotlib.pyplot as plt
import sys

Populating the interactive namespace from numpy and matplotlib


### This is the implementation of the Fisher LDA learning algorithm (question 1)

In [1970]:
class fisher_lda:
    """Implementation of the fisher LDA learning algorithm"""
    def __init__(self):
        pass

    def train(self,train_data):
        """Learning the parameters using the maximum likelihood estimator"""

        train_data0 = np.array([i[:-1] for i in train_data if i[-1] == 0])
        train_data1 = np.array([i[:-1] for i in train_data if i[-1] == 1])

        N0 = len(train_data0)
        N1 = len(train_data1)

        self.pi = N1 / (N0 + N1)
        self.mu0 = (1 / N0) * np.sum(train_data0, axis=0)
        self.mu1 = (1 / N1) * np.sum(train_data1, axis=0)
        
        sigmatilde0 = (1/N0) * np.matmul((train_data0-self.mu0).T,(train_data0-self.mu0))
        sigmatilde1 = (1/N1) * np.matmul((train_data1-self.mu1).T,(train_data1-self.mu1))

        self.Sigma = (1 - self.pi) * sigmatilde0 + self.pi * sigmatilde1
        self.Sigmainv = np.linalg.inv(self.Sigma)
 
    def compute_predictions(self,test_data):
        
        posterior = np.matmul((np.matmul(self.mu1, self.Sigmainv) - np.matmul(self.mu0, self.Sigmainv)),test_data.T) + \
                (0.5 * np.matmul(np.matmul(self.mu0,self.Sigmainv),self.mu0.T) - \
                 0.5 * np.matmul(np.matmul(self.mu1,self.Sigmainv),self.mu1.T) + \
                 np.log(self.pi/(1-self.pi)))
        posterior = np.array([sigmoid(posterior)]).T
        
        return posterior

### This is the implementation of the logistic regression learning algorithm (question 2)

In [1971]:
class logistic_regression:
    """Implementation of the logistic regression learning algorithm"""

    def __init__(self, data, labels):
        self.X = np.ones((np.shape(data)[0],np.shape(data)[1]+1))      #array has a column of ones for the bias
        self.X[:,:-1] = data
        self.y = labels
        self.params = np.zeros((np.shape(self.X)[-1],1))

    def train(self,train_data):
        """Learning the parameters using the IRLS algorithm (2nd degree method)"""
        update = np.ones_like(self.params)*1000
        i = 0
        while update.sum() > 0.001 and i < 5:  
            """We update until convergence up to a maximum of 5 iterations"""
            mu = sigmoid(np.matmul(self.params.T,self.X.T)).T
            D = mu * (1-mu)
            Hessian = np.linalg.inv(np.matmul((self.X*D).T,self.X))
            gradlost = np.matmul(self.X.T,(mu-self.y))

            update = np.matmul(Hessian, gradlost)
            self.params -= update
            i += 1

    def compute_predictions(self,test_data):
        return sigmoid(np.matmul(test_data,self.params[:-1]) + self.params[-1:])

### This is the implementation of the linear regression learning algorithm (question 3)

In [1972]:
class linear_regression:
    """Implementation of the linear regression learning algorithm"""

    def __init__(self, data, labels):
        self.X = np.ones((np.shape(data)[0],np.shape(data)[1]+1))      #array has a column of ones for the bias
        self.X[:,:-1] = data
        self.y = labels
        self.params = np.zeros((np.shape(self.X)[-1],1))

    def train(self, train_data):
        """Learning the parameters using the normal equations"""
        matinv = np.linalg.inv(np.matmul(self.X.T,self.X))
        self.params = np.matmul(np.matmul(matinv,self.X.T),self.y)

    def compute_predictions(self, test_data):
        return np.matmul(test_data,self.params[:-1]) + self.params[-1:]

### This is the implementation of the QDA learning algorithm (question 5)

In [1973]:
class qda:
    """Implementation of the QDA learning algorithm"""
    def __init__(self):
        pass

    def train(self, train_data):
        """Learning the parameters using the maximum likelihood estimator"""
        train_data0 = np.array([i[:-1] for i in train_data if i[-1] == 0])
        train_data1 = np.array([i[:-1] for i in train_data if i[-1] == 1])

        N0 = len(train_data0)
        N1 = len(train_data1)

        self.pi = N1 / (N0 + N1)
        self.mu0 = (1 / N0) * np.sum(train_data0, axis=0)
        self.mu1 = (1 / N1) * np.sum(train_data1, axis=0)
        
        self.sigmatilde0 = (1/N0) * np.matmul((train_data0-self.mu0).T,(train_data0-self.mu0))
        self.sigmatilde1 = (1/N1) * np.matmul((train_data1-self.mu1).T,(train_data1-self.mu1))
        
        self.detsigma0 = np.linalg.det(self.sigmatilde0)
        self.detsigma1 = np.linalg.det(self.sigmatilde1)
        
        self.sigma1inv = np.linalg.inv(self.sigmatilde1)
        self.sigma0inv = np.linalg.inv(self.sigmatilde0)
        
    def compute_predictions(self, test_data):
        
        square_term = -0.5 * np.sum(np.matmul(test_data,(self.sigma1inv-self.sigma0inv))*test_data, axis=1)
    
        posterior = np.matmul((np.matmul(self.mu1, self.sigma1inv) - np.matmul(self.mu0, self.sigma0inv)),test_data.T) + \
                (0.5 * np.matmul(np.matmul(self.mu0,self.sigma0inv),self.mu0.T) - \
                 0.5 * np.matmul(np.matmul(self.mu1,self.sigma1inv),self.mu1.T) + \
                 np.log(self.pi/(1-self.pi)) + \
                 0.5 * np.log(self.detsigma0/self.detsigma1))
            
        posterior += square_term
        posterior = np.array([sigmoid(posterior)]).T
        
        return posterior

In [1974]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

### Function used to plot the data and the decision boundary for all models

In [1975]:
def plot_data(fit, data, dataset, model):
    data0 = np.array([i[:-1] for i in data if i[-1] == 0])
    data1 = np.array([i[:-1] for i in data if i[-1] == 1])
    
    domainx = np.linspace(np.min(train_data[:,0]),np.max(train_data[:,0]))
    domainy = np.linspace(np.min(train_data[:,1]),np.max(train_data[:,1]))
    X,Y = np.meshgrid(domainx, domainy)
    domain = np.array([[i,j] for i in domainx for j in domainy])
    prediction = fit.compute_predictions(domain)

    plt.plot(data0[:,0], data0[:,1], 'r*',label='class 0')
    plt.plot(data1[:,0], data1[:,1], 'bo',label = 'class 1')
    
    plt.contour(X,Y,np.reshape(prediction,X.shape).T, levels=[0.5])
    
    plt.xlabel('feature 1')
    plt.ylabel('feature 2')
    plt.legend(fancybox=True)
    plt.savefig('latex/figures/fig_%s_dataset%s.png'%(model, dataset))

    plt.clf()

### Function used to compute misclassification error for all models

In [1976]:
def compute_error(fit, train_data, test_data, dataset):
    prediction_train = np.around(fit.compute_predictions(train_data[:,:-1]))
    prediction_test = np.around(fit.compute_predictions(test_data[:,:-1]))
    
    error_train = np.sum(np.abs(prediction_train-train_data[:,-1:]))*100/len(train_data)
    error_test = np.sum(np.abs(prediction_test-test_data[:,-1:]))*100/len(test_data)
                                     
    print("Misclassification error (training): %.2f%%" %error_train)
    print("Misclassification error (test): %.2f%%\n" %error_test)

In [1977]:
if __name__ == '__main__':
    
    models = ['fisher_lda','logistic_regression','linear_regression','qda']
    datasets = ['A','B','C']
    
    for model in models:
        print('Model : %s'%model)
         
        for dataset in datasets:
            print('Dataset : %s\n'%dataset)
            
            train_data = np.loadtxt('data/classification%s.train'%dataset)
            test_data = np.loadtxt('data/classification%s.test'%dataset) 
        
            if model == 'fisher_lda':
                fit = fisher_lda()
            elif model == 'logistic_regression':
                fit = logistic_regression(train_data[:,:-1],train_data[:,-1:])
            elif model == 'linear_regression':
                fit = linear_regression(train_data[:,:-1],train_data[:,-1:])
            elif model == 'qda':
                fit = qda()
            else:
                raise Exception ('model not supported')
            
            fit.train(train_data)
            
            if model in ['logistic_regression','linear_regression']:
                print('The parameters learned from the dataset %s for the %s model are\nw = \n%s \nand \nb = %f\n'
                        %(dataset,model,np.array_str(fit.params[:-1]),fit.params[-1:]))
            elif model == 'fisher_lda':
                print('The parameters learned from the dataset %s for the %s model are the following:'%(dataset,model))
                print('pi = %f'%fit.pi)
                print('mu_0 = %s'%np.array_str(fit.mu0))
                print('mu_1 = %s'%np.array_str(fit.mu1))
                print('Sigma = \n%s'%np.array_str(fit.Sigma))
            elif model == 'qda':
                print('The parameters learned from the dataset %s for the %s model are the following:'%(dataset,model))
                print('pi = %f'%fit.pi)
                print('mu_0 = %s'%np.array_str(fit.mu0))
                print('mu_1 = %s'%np.array_str(fit.mu1))
                print('Sigma_0 = \n%s'%np.array_str(fit.sigmatilde0))
                print('Sigma_1 = \n%s'%np.array_str(fit.sigmatilde1))
                
            
            plot_data(fit,train_data, dataset, model)
            compute_error(fit, train_data, test_data, dataset)
            

Model : fisher_lda
Dataset : A

The parameters learned from the dataset A for the fisher_lda model are the following:
pi = 0.333333
mu_0 = [ 2.89970947 -0.893874  ]
mu_1 = [-2.69232004  0.866042  ]
Sigma = 
[[ 2.44190897 -1.13194024]
 [-1.13194024  0.61375465]]
Misclassification error (training): 1.33%
Misclassification error (test): 2.00%

Dataset : B

The parameters learned from the dataset B for the fisher_lda model are the following:
pi = 0.500000
mu_0 = [ 3.34068896 -0.83546333]
mu_1 = [-3.21670734  1.08306733]
Sigma = 
[[ 3.34623467 -0.13516489]
 [-0.13516489  1.73807475]]
Misclassification error (training): 3.00%
Misclassification error (test): 4.15%

Dataset : C

The parameters learned from the dataset C for the fisher_lda model are the following:
pi = 0.625000
mu_0 = [ 2.79304824 -0.83838667]
mu_1 = [-2.94232885 -0.9578284 ]
Sigma = 
[[ 2.88039225 -0.63405081]
 [-0.63405081  5.19952435]]
Misclassification error (training): 5.50%
Misclassification error (test): 4.23%

Model : l

<matplotlib.figure.Figure at 0x106ed2438>