## Import libraries

In [1]:
from data_loader import DataLoader
from logistic_regression import LogisticRegression
import numpy as np

## Load data

In [2]:
dataset_root_directory = './dataset'
full_dataset = DataLoader.load_full_dataset(dataset_root_directory)
print('%d subsets of %d training examples with labels' % (len(full_dataset), len(full_dataset[0][0])))
print('Total %d training examples and labels' % (len(full_dataset)*len(full_dataset[0][0])))

10 subsets of 111 training examples with labels
Total 1110 training examples and labels


## Initialize parameters

In [3]:
input_vector_size = 64

## Create and learn a model

In [4]:
model = LogisticRegression(M=input_vector_size)

# Train the model on the full dataset
model.learn(full_dataset, report_acc=True)

# Model parameters and stats
model.summary()

Training Accuracy 86.75675675675674
Training Accuracy = 86.757 %

=====Model Summary=====
w0 = [0.04729453]

w of size (64, 1) :
array([[-0.00017858],
       [ 0.00719516],
       [ 0.01357879],
       [ 0.00577427],
       [ 0.02376583],
       [ 0.07107198],
       [ 0.01014586],
       [ 0.01618142],
       [ 0.03662329],
       [-0.01094573],
       [ 0.0521205 ],
       [ 0.00543148],
       [ 0.0094478 ],
       [ 0.07003225],
       [ 0.00489671],
       [-0.02263623],
       [ 0.03234231],
       [ 0.02862491],
       [ 0.00438958],
       [-0.00900111],
       [-0.00526394],
       [-0.02526586],
       [-0.01513025],
       [ 0.01133964],
       [ 0.02345222],
       [ 0.00527371],
       [-0.00931745],
       [ 0.00806537],
       [ 0.03951118],
       [-0.00307993],
       [-0.02989851],
       [ 0.00098481],
       [ 0.03202586],
       [-0.00446742],
       [-0.02888178],
       [-0.01797764],
       [-0.01489359],
       [-0.03668074],
       [-0.01723574],
       [-0.01

## Test the model

In [5]:
# The above trained model can now classify unseen test data in the future

# Since we do not have unseen test data, we will take a part of training data as test data and 
# test a newly trained model on remaining data as training data 

# To generate test data, we split the training data
train_dataset, test_attrs, test_labels = DataLoader.load_with_test_data(
                                                         dataset_root_directory,
                                                         split_ratio=0.1)
print(test_attrs.shape, 'testing examples')
print(test_labels.shape, 'testing labels')
print('%d subsets of %d training examples with labels' % (len(train_dataset), len(train_dataset[0][0])))
print('Total %d training examples and labels' % (len(train_dataset)*len(train_dataset[0][0])))


test_model = LogisticRegression(M=input_vector_size)
test_model.learn(train_dataset)
predicted_labels, accuracy = test_model.classify(test_attrs, true_labels=test_labels)

print('\nTest Accuracy = %.3f %%' % accuracy)
print('\n=====Result( True label , Predicted lalue )=====')
for values in np.dstack((test_labels, predicted_labels)):
    #print('True label = ', values[0][0], 'Predicted label = ', values[0][1]) 
    print('(',values[0][0], ',', values[0][1], ')', end=' ')
    if values[0][0] != values[0][1]:
        print(' <--------Incorrect', end=' ')
    print()


(111, 64) testing examples
(111, 1) testing labels
9 subsets of 111 training examples with labels
Total 999 training examples and labels

Test Accuracy = 88.288 %

=====Result( True label , Predicted lalue )=====
( 6.0 , 6.0 ) 
( 5.0 , 5.0 ) 
( 6.0 , 5.0 )  <--------Incorrect 
( 5.0 , 5.0 ) 
( 6.0 , 6.0 ) 
( 6.0 , 6.0 ) 
( 6.0 , 6.0 ) 
( 6.0 , 5.0 )  <--------Incorrect 
( 5.0 , 5.0 ) 
( 6.0 , 6.0 ) 
( 6.0 , 6.0 ) 
( 5.0 , 6.0 )  <--------Incorrect 
( 5.0 , 5.0 ) 
( 6.0 , 6.0 ) 
( 6.0 , 6.0 ) 
( 6.0 , 6.0 ) 
( 6.0 , 6.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 6.0 , 5.0 )  <--------Incorrect 
( 6.0 , 6.0 ) 
( 6.0 , 6.0 ) 
( 5.0 , 5.0 ) 
( 6.0 , 6.0 ) 
( 6.0 , 6.0 ) 
( 6.0 , 6.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 6.0 , 6.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 6.0 , 6.0 ) 
( 6.0 , 6.0 ) 
( 5.0 , 5.0 ) 
( 6.0 , 6.0 ) 
( 5.0 , 5.0 ) 
( 5.0 , 5.0 ) 
( 6.0 , 6.0 ) 
( 5.0 , 5.0 ) 
( 

## Linear Seperability experiment

In [7]:
X, y = DataLoader.load_merged_dataset(dataset_root_directory)

# Use the model with which the whole data was trained
below_hyperplane, above_hyperplane = model.do_experiment(X, true_labels=y)

# Get count of points of each class above and below hyperplane
class5_below = below_hyperplane.count(5.0)
class6_below = below_hyperplane.count(6.0)
class5_above = above_hyperplane.count(5.0)
class6_above = above_hyperplane.count(6.0)

# Check if data is linear seperable
if 0 in [class5_below, class5_above] and 0 in [class6_below, class6_above]:
    print('The data is linearly seperable')
else:
    print('The data is not linearly seperable')

print('='*10 + 'Experiment Summary' + 10*'=')
print('Total No. of Data points =', len(below_hyperplane) + len(above_hyperplane))
print('Class 5 points above hyperplane =', class5_above)
print('Class 5 points below hyperplane =', class5_below)
print('Class 6 points below hyperplane =', class6_below)
print('Class 6 points above hyperplane =', class6_above)

The data is not linearly seperable
Total No. of Data points = 1110
Class 5 points above hyperplane = 493
Class 5 points below hyperplane = 58
Class 6 points below hyperplane = 494
Class 6 points above hyperplane = 65
