In [1]:
#Load packages
import pandas as pd
import numpy as np
import scipy.io as sio
import random
import scipy
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.manifold import TSNE
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.model_selection import cross_val_score
from imblearn.over_sampling import ADASYN

In [2]:
#This function is used to encode labels since labels are categorical.
def encode_labels(labels):
    le = LabelEncoder()
    le.fit(labels)
    encoded_labels = le.transform(labels)
    
    return encoded_labels, le

def decode_labels(encoded_predict_labels, le):
    test_predictions = le.inverse_transform(encoded_predict_labels)
    
    return test_predictions

#Load data
train = sio.loadmat('train.mat')
validation = sio.loadmat('validation.mat')
test = sio.loadmat('test_wolabels.mat')

train_classid = np.squeeze(train['classid'])
train_class_labels = []
for item in train_classid:
    train_class_labels.append(item[0])
train_features = train['features']
train_imid = train['imid']
train_sampleid = train['sampleid']
print(train_features.shape)
train_unique_labels = sorted(np.unique(train_class_labels))
train_unique_labels_count = len(train_unique_labels)
print(train_unique_labels_count)

validation_classid = np.squeeze(validation['classid'])
validation_class_labels = []
for item in validation_classid:
    validation_class_labels.append(item[0])
validation_features = validation['features']
validation_imid = validation['imid']
validation_sampleid = validation['sampleid']
print(validation_features.shape)
validation_unique_labels = sorted(np.unique(validation_class_labels))
validation_unique_labels_count = len(validation_unique_labels)
print(validation_unique_labels_count)

#encoded train labels
train_labels, le = encode_labels(train_class_labels)
print(len(train_labels))

#encoded validation labels
validation_labels = le.transform(validation_class_labels)
print(len(validation_labels))

(7849, 2048)
1013
(1379, 2048)
1013
7849
1379


In [3]:
#0-1 normalization
from sklearn.preprocessing import MinMaxScaler

scalar = MinMaxScaler()
train_features_norm = scalar.fit_transform(train_features)
validation_features_norm = scalar.transform(validation_features)

Ridge Classifier

alpha parameter 1 is optimum

In [5]:
clf = RidgeClassifier(class_weight = 'balanced', random_state=0)
clf.fit(train_features_norm, train_labels)
RidgeClassifier_predictions = clf.predict(validation_features_norm)
score = clf.score(validation_features_norm, validation_labels)
print("Mean class accuracy scores:", score)

Mean class accuracy scores: 0.8042059463379261


In [6]:
#Save predictions in an excel file
output_df = pd.DataFrame(RidgeClassifier_predictions)
output_df.to_csv('Task1_RidgeClassifier_predictions.csv', index=False,  header=False)

In [7]:
from sklearn.metrics import confusion_matrix

matrix = confusion_matrix(validation_labels, RidgeClassifier_predictions)
acc = matrix.diagonal()/matrix.sum(axis=1)
print(sum(acc)/len(acc))

0.7829643209702438


In [8]:
#z-score normalization
from sklearn.preprocessing import StandardScaler

scalar = StandardScaler()
train_features_norm = scalar.fit_transform(train_features)
validation_features_norm = scalar.transform(validation_features)

In [9]:
clf = RidgeClassifier(class_weight = 'balanced', random_state=0)
clf.fit(train_features_norm, train_labels)
RidgeClassifier_predictions = clf.predict(validation_features_norm)
score = clf.score(validation_features_norm, validation_labels)
print("accuracy scores:", score)

accuracy scores: 0.8063814358230602


In [10]:
matrix = confusion_matrix(validation_labels, RidgeClassifier_predictions)
acc = matrix.diagonal()/matrix.sum(axis=1)
print("Mean class accuracy scores:", sum(acc)/len(acc))

Mean class accuracy scores: 0.782750434823485
