# AI 201 Programming Assignment 4
## Perceptron Classifier boosted with Adaboost vs. SVM

Submitted by: 
Jan Lendl R. Uy, 2019-00312

## Install and import necessary libraries
- numpy
- scikit-learn

In [29]:
!pip install numpy scikit-learn



In [30]:
import time
import random
import numpy as np
from sklearn.svm import SVC

In [31]:
BANANA_DATASET = "banana_data.csv"
SPLICE_DATASET = "splice_data.csv"

## Read the banana and splice datasets

In [32]:
def read_dataset(filepath):
    # Load the entire dataset using numpy
    data = np.loadtxt(filepath, delimiter=",")
    
    # Separate features and labels
    y = data[:, 0]  # First column contains the labels
    X = data[:, 1:]  # Succeeding columns contain the features
    
    return X, y

In [33]:
X_banana, y_banana = read_dataset(BANANA_DATASET)
X_splice, y_splice = read_dataset(SPLICE_DATASET)

print(f"X_banana: {X_banana.shape}")
print(f"y_banana: {y_banana.shape}")
print(f"X_splice: {X_splice.shape}")
print(f"y_splice: {y_splice.shape}")

X_banana: (5300, 2)
y_banana: (5300,)
X_splice: (2991, 60)
y_splice: (2991,)


## Split the datasets into training and test sets
- Banana dataset
    - Training: 400
    - Test: 4900
- Splice dataset:
    - Training: 1000
    - Test: 2175

In [34]:
def train_test_split(X, y, test_size=0.3, random_state=None):
    if random_state is not None:
        random.seed(random_state)
    
    # Create list of indices and shuffle it
    indices = list(range(len(X)))
    random.shuffle(indices)
    
    # Calculate split point
    split = int(len(X) * (1 - test_size))
    
    # Split the data
    train_indices = indices[:split]
    test_indices = indices[split:]
    
    X_train = np.array([X[i] for i in train_indices])
    X_test = np.array([X[i] for i in test_indices])
    y_train = np.array([y[i] for i in train_indices])
    y_test = np.array([y[i] for i in test_indices])
    
    return X_train, X_test, y_train, y_test

In [35]:
X_banana_train, X_banana_test, y_banana_train, y_banana_test = train_test_split(X_banana, 
                                                                                y_banana, 
                                                                                test_size=(4900-1)/X_banana.shape[0], 
                                                                                random_state=0)

print(f"X_banana_train: {X_banana_train.shape}")
print(f"X_banana_test: {X_banana_test.shape}")
print(f"y_banana_train: {y_banana_train.shape}")
print(f"y_banana_test: {y_banana_test.shape}")

X_banana_train: (400, 2)
X_banana_test: (4900, 2)
y_banana_train: (400,)
y_banana_test: (4900,)


In [36]:
X_splice_train, X_splice_test, y_splice_train, y_splice_test = train_test_split(X_splice,
                                                                                y_splice, 
                                                                                test_size=2175/X_splice.shape[0], 
                                                                                random_state=0)

print(f"X_splice_train: {X_splice_train.shape}")
print(f"X_splice_test: {X_splice_test.shape}")
print(f"y_splice_train: {y_splice_train.shape}")
print(f"y_splice_test: {y_splice_test.shape}")

X_splice_train: (816, 60)
X_splice_test: (2175, 60)
y_splice_train: (816,)
y_splice_test: (2175,)


## Perfomance metric: Accuracy

In [37]:
def accuracy_score(y_true, y_pred):    
    # Calculate accuracy
    return np.mean(y_true == y_pred)

## Perceptron Classifier
- classify()
- predict()

In [38]:
class PerceptronClassifier:
    
    def __init__(self):
        pass
    
    def classify(self, max_iter_cnt=10000):
        pass
    
    def predict(self):
        pass

## Adaboost
- adabtrain()
- adabpredict()

In [39]:
class AdaBoost:
    
    def __init__(self, k=10):
        self.k = k
        pass
    
    def adabtrain(self):
        pass
    
    def adabpredict(self):
        pass

## SVM

In [40]:
# Initialize an SVM for classification then train
svc_banana = SVC()

start_time = time.time()
svc_banana.fit(X_banana_train, y_banana_train)
svc_banana_training_time = time.time() - start_time

# Report the accuracy, training and test speeds of the 
# trained SVC on the banana dataset
start_time = time.time()
y_banana_pred = svc_banana.predict(X_banana_test)
svc_banana_testing_time = time.time() - start_time

svc_banana_accuracy = accuracy_score(y_banana_pred, y_banana_test)

print(f"SVM Accuracy on Banana Dataset: {svc_banana_accuracy:.5f}")
print(f"SVM Training Time: {svc_banana_training_time:.5f} s")
print(f"SVM Testing Time: {svc_banana_testing_time:.5f} s")

SVM Accuracy on Banana Dataset: 0.88000
SVM Training Time: 0.00221 s
SVM Testing Time: 0.03052 s


In [41]:
# Initialize an SVM for classification then train
svc_splice= SVC()
start_time = time.time()
svc_splice.fit(X_splice_train, y_splice_train)
svc_splice_training_time = time.time() - start_time

# Report the accuracy, training and test speeds of the 
# trained SVC on the splice dataset
start_time = time.time()
y_splice_pred = svc_splice.predict(X_splice_test)
svc_splice_testing_time = time.time() - start_time

svc_splice_accuracy = accuracy_score(y_splice_pred, y_splice_test)

print(f"SVM Accuracy on Splice Dataset: {svc_splice_accuracy:.5f}")
print(f"SVM Training Time: {svc_splice_training_time:.5f} s")
print(f"SVM Testing Time: {svc_splice_testing_time:.5f} s")

SVM Accuracy on Splice Dataset: 0.89149
SVM Training Time: 0.01625 s
SVM Testing Time: 0.08796 s
