# Classification

### Group 8
- Rachit Ranjan 
- Jun Deng
- Zhitao Liu
- Pu Fang 
- Ashutosh 

### Imports

In [34]:
import scipy.io as scio
import matplotlib.pyplot as plt
import numpy as np
import random
from sklearn.svm import SVC 
from sklearn.model_selection import train_test_split
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from skbayes.rvm_ard_models import RVC

In [35]:
train_data = scio.loadmat('Proj2FeatVecsSet1.mat')['Proj2FeatVecsSet1']
output_labels = scio.loadmat('Proj2TargetOutputsSet1.mat')['Proj2TargetOutputsSet1']

In [36]:
print(train_data.shape, output_labels.shape)

(25000, 60) (25000, 5)


### Data Preprocessing
- One Hot Encoding 
- Adding Unknown Class Label

In [37]:
def encode_output_labels(output_labels=output_labels):
    "Encodes output labels to a single value"
    out_labels = []
    for row in output_labels:
        out_labels.append(np.where(row == 1)[0][0])
    return np.array(out_labels)

def generate_unknown_class_data(entries=10000):
    "Returns Data and Output Labels for Unknown Class(Nc+1)"
    unknown_class_data = []
    for i in range(entries):
        rand = []
        for j in range(train_data.shape[1]):
            rand.append(random.random())
        unknown_class_data.append(rand)
    unknown_class_label = np.full((10000), 5, dtype=int)
    return np.array(unknown_class_data), unknown_class_label

In [38]:
# Encode Output Lables to a single value
out_labels = encode_output_labels(output_labels=output_labels)

# Generate Data for Training Unknown Class Nc+1
unknown_class_data, unknown_class_labels = generate_unknown_class_data(entries=10000)

# Merge Actual and Generated Class Nc+1 Training Data and
train_data = np.concatenate((train_data, unknown_class_data))
out_labels = np.concatenate((out_labels, unknown_class_labels))

print(train_data.shape, out_labels.shape)

(35000, 60) (35000,)


### Classification Algorithms

In [42]:
# Default train and test sets 
x_train_def, x_test_def, y_train_def, y_test_def = train_test_split(train_data, out_labels,
                                                                   test_size=0.33,
                                                                   shuffle=True)
x_train_def = x_train_def[:1000]
x_test_def = x_test_def[:1000]
y_train_def = y_train_def[:1000]
y_test_def = y_test_def[:1000]




def svm_classify(x_train=x_train_def,y_train=y_train_def,
                 x_test=x_test_def, y_test=y_test_def,
                 kernel='rbf',gamma='auto', probability=True, decision_function_shape='ovr', degree=3,
                 tol=1e-3):
    """Trains a Support Vector Machine Classifer"""
    # Create Model with passed hyperparameters
    svc = SVC(kernel=kernel,
              gamma=gamma,
              degree=degree, # Ignored unless a polynomial kernel function is used
              probability=probability,
              decision_function_shape=decision_function_shape,
              tol=tol,
              random_state=42)
    
    # Train Model 
    svc.fit(x_train,y_train)
    
    # Return Trained Model and Accuracy on Test Data
    return svc, svc.score(x_test,y_test), 



def rvm_classify(x_train=x_train_def,y_train=y_train_def,
                 x_test=x_test_def, y_test=y_test_def,
                 kernel='rbf', degree=3,n_iter=100,tol=0.001):
    """Trains a Relevance Vector Machine Classifier"""
    
    rvm = RVC(kernel=kernel,
              degree=degree,
              n_iter=n_iter,
              tol=tol)
    
    # Train Model 
    rvm.fit(x_train,y_train)
    
    # Return Trained Model and Accuracy on Test Data
    return rvm, (rvm.predict(x_test) == y_test).sum()/100.0



def gp_classify(x_train=x_train_def,y_train=y_train_def,
                x_test=x_test_def, y_test=y_test_def,
                kernel=1.0 * RBF(1.0), optimizer='fmin_l_bfgs_b',
                n_restarts_optimizer=0, max_iter_predict=100,
                warm_start=True,multi_class='one_vs_one'):
    """Trains a Gaussian Process Classifier"""
    
    gpc = GaussianProcessClassifier(kernel=kernel,
                                   optimizer=optimizer,
                                   n_restarts_optimizer=n_restarts_optimizer,
                                   max_iter_predict=max_iter_predict,
                                   warm_start=warm_start,
                                   multi_class=multi_class,
                                   n_jobs=-1)
    
    # Train Model 
    gpc.fit(x_train, y_train)

    # Return Trained Model and Accuracy on Test Data
    return gpc, gpc.score(x_test,y_test)



## TrainMyClassifer

In [77]:
def TrainMyClassifer():
    pass