In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsOneClassifier

In [2]:
# Load training set
dataframes_training = pd.read_csv('data/training_set_metadata.csv')
y = dataframes_training.target.values
# Extract columns from the training set so that they match the columns from test set below
X = np.nan_to_num(dataframes_training.iloc[:,[0,1,2,5,6,7,8,9,10]])

# Load test set (limited to half a million rows as a baseline)
dataframes_test = pd.read_csv('data/plasticc_test_metadata.csv')
X_test = np.nan_to_num(dataframes_test.iloc[:500000,:9])
y_test = dataframes_test.iloc[:500000,[10]].values

In [None]:
# Making sure that we extracted the correct columns 
"""dataframes2_test = pd.read_csv('data/test_set_metadata.csv')
X_test2 = np.nan_to_num(dataframes2_test.iloc[:3,[0,1,2,5,6,7,8,9,10]])
for i in range(9):
    print(X_test[0,i])
    print(X_test2[0,i])"""

In [None]:
# Running a SVC classifier using crammer_singer stratigy for multi class classification. 
# NOT recommended according to scikit-learn documentation

SVC_CS = LinearSVC(multi_class="crammer_singer")
Predictions_SVC_CS = SVC_CS.fit(X, y).predict(X_test)

print("The original classes in the test set are", np.unique(y_test), "\n")
print("The predicted classes are", np.unique(Predictions_SVC_CS), "\n")
print("Missing the following classes in the prediction", np.setdiff1d(np.unique(y_test),np.unique(Predictions_SVC_CS)), "\n")
print("The accuracy for Linear SVC Crammer Singer classifier is " + "{:.2%}".format(SVC_CS.score(X_test, y_test)))

In [19]:
# Running a SVC classifier using One-Vs-Rest stratigy for multi class classification.
SVC_OVR = LinearSVC(multi_class="ovr")
Predictions_SVC_OVR = SVC_OVR.fit(X,y).predict(X_test)

print("The original classes in the test set are", np.unique(y_test), "\n")
print("The predicted classes are", np.unique(Predictions_SVC_OVR), "\n")
print("Missing the following classes in the prediction", np.setdiff1d(np.unique(y_test),np.unique(Predictions_SVC_OVR)), "\n")
print("The accuracy for Linear SVC One-Vs-Rest classifier is " + "{:.2%}".format(SVC_OVR.score(X_test, y_test)))



The original classes in the test set are [  6  15  16  42  52  53  62  64  65  67  88  90  92  95 991 992 993 994] 

The predicted classes are [ 6 16 42 65 90 92] 

Missing the following classes in the prediction [ 15  52  53  62  64  67  88  95 991 992 993 994] 

The accuracy for Linear SVC One-Vs-Rest classifier is 37.34%


In [3]:
# Running a OneVsOneClassifier classifier using LinearSVC as the estimator.
SVC_OVO = OneVsOneClassifier(LinearSVC(random_state=0), -1)
Predictions_SVC_OVO = SVC_OVO.fit(X,y).predict(X_test)

print("The original classes in the test set are", np.unique(y_test), "\n")
print("The predicted classes are", np.unique(Predictions_SVC_OVO), "\n")
print("Missing the following classes in the prediction", np.setdiff1d(np.unique(y_test),np.unique(Predictions_SVC_OVO)), "\n")
print("The accuracy for Linear SVC One-Vs-One classifier is " + "{:.2%}".format(SVC_OVO.score(X_test, y_test)))

The original classes in the test set are [  6  15  16  42  52  53  62  64  65  67  88  90  92  95 991 992 993 994] 

The predicted classes are [16 42 64 65 88 90 92] 

Missing the following classes in the prediction [  6  15  52  53  62  67  95 991 992 993 994] 

The accuracy for Linear SVC One-Vs-One classifier is 52.27%
