In [33]:
import numpy as np
import pandas as pd
import competition_helpers
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [34]:
# I/O configuration here
X_train = competition_helpers.read_csv("train_features.csv")
y_train = competition_helpers.read_csv("train_label.csv", remove_header=True)
X_test = competition_helpers.read_csv("test_features.csv")
submission_col = np.array(pd.read_csv("test_features.csv", header=None).iloc[: , 0]).ravel()
submission_file_name = "results/SVM_submission.csv"

print(X_train.shape, y_train.shape, X_test.shape)

(418, 100) (418, 1) (378, 100)


In [35]:
# 5 fold cross validation
train_test_split = competition_helpers.kfold_stratified_split(X_train, y_train, 5,False)
# With standardization
standardized_train_test_split = competition_helpers.kfold_stratified_split(X_train, y_train, 5,True)

In [36]:
# 5 fold train test split results
results = []
for [(X_train_cv, y_train_cv), (X_test_cv, y_test_cv)] in train_test_split:
    
    clf = SVC(C=1.0,kernel='rbf',degree=4,gamma='auto')
    clf.fit(X_train_cv, y_train_cv.ravel())  
    prediction = clf.predict(X_test_cv)
    
    accuracy = accuracy_score(y_test_cv.ravel(), prediction.ravel())
    precision = precision_score(y_test_cv.ravel(), prediction.ravel())
    recall = recall_score(y_test_cv.ravel(), prediction.ravel())
    f1 = f1_score(y_test_cv.ravel(), prediction.ravel())
    
    results.append([accuracy, precision, recall, f1])
    

measures = np.sum(np.array(results), axis=0) / len(results) 
print(measures)

# With standardization

results = []
for [(X_train_cv, y_train_cv), (X_test_cv, y_test_cv)] in standardized_train_test_split:
    
    clf = SVC(C=1.0,kernel='rbf',degree=4,gamma='auto')
    clf.fit(X_train_cv, y_train_cv.ravel())  
    prediction = clf.predict(X_test_cv)
    
    accuracy = accuracy_score(y_test_cv.ravel(), prediction.ravel())
    precision = precision_score(y_test_cv.ravel(), prediction.ravel())
    recall = recall_score(y_test_cv.ravel(), prediction.ravel())
    f1 = f1_score(y_test_cv.ravel(), prediction.ravel())
    
    results.append([accuracy, precision, recall, f1])
    

measures = np.sum(np.array(results), axis=0) / len(results) 
print(measures)

[0.74404711 0.74404711 1.         0.85323673]
[0.8278428  0.81892379 0.98725038 0.89513694]


In [37]:
# fitting the test dataset
clf = SVC(C=1.0,kernel='rbf',degree=4,gamma='auto')
#X_train_standard,X_test_standard = competition_helpers.standardize_data(X_train,X_test)
clf.fit(X_train, y_train.ravel())  
prediction = clf.predict(X_test)

In [38]:
pd.DataFrame({"id": submission_col, "label": prediction}).to_csv(submission_file_name, encoding='utf-8', index=False)