In [1]:
# Load Library
import random
import math
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as io
import sklearn
import libsvm
from libsvm.svmutil import *
import pandas as pd
import seaborn as sns

%matplotlib inline
random.seed(1)

In [2]:
# Load Train Data
Data = pd.read_csv("Train.csv")
Data['Neighbourhood'] = Data['Neighbourhood'].astype(str)
Y = Data['Decision']
X = Data.drop(['Decision'], axis=1)
# Load Test Data
Test = pd.read_csv("Test.csv")
Test['Neighbourhood'] = Test['Neighbourhood'].astype(str)

In [3]:
def Encode(airbnb):
    for column in airbnb.columns[airbnb.columns.isin(['Room_type', 'Month','Neighbourhood'])]:
        airbnb[column] = airbnb[column].factorize()[0]
    return airbnb

X_data = Encode(X.copy())
Test_data = Encode(Test.copy())

In [4]:
# Split Data: 20% Test, 80% Train
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_data,Y, test_size=0.2, random_state= 1)

# Base Model

In [5]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,f1_score

# instantiate classifier with default hyperparameters
svc=SVC() 

# fit classifier to training set
svc.fit(X_train,y_train)

# make predictions on test set
y_pred=svc.predict(X_test)

# compute and print accuracy score
print('Model f1 score with default hyperparameters: {0:0.4f}'. format(f1_score(y_test, y_pred)))

Model f1 score with default hyperparameters: 0.7742


# Run SVM with rbf kernel and different C value


In [15]:
svc=SVC(C=100.0)  #SVM Default is kernel='rbf'

# fit classifier to training set
svc.fit(X_train,y_train)

# make predictions on test set
y_pred=svc.predict(X_test)

# compute and print accuracy score
print('Model f1 score with C=100 and rbf kernel hyperparameters: {0:0.4f}'. format(f1_score(y_test, y_pred)))

Model f1 score with C=100 and rbf kernel hyperparameters: 0.7811


In [16]:
accuracy_score(y_test, y_pred)

0.6622073578595318

In [13]:
svc=SVC(C=1000)  #SVM Default is kernel='rbf'

# fit classifier to training set
svc.fit(X_train,y_train)

# make predictions on test set
y_pred=svc.predict(X_test)

# compute and print accuracy score
print('Model f1 score with C=1000 and rbf kernel hyperparameters: {0:0.4f}'. format(f1_score(y_test, y_pred)))

Model f1 score with C=1000 and rbf kernel hyperparameters: 0.7892


In [14]:
accuracy_score(y_test, y_pred)

0.6809364548494983

In [10]:
svc=SVC(C=10000)  #SVM Default is kernel='rbf'

# fit classifier to training set
svc.fit(X_train,y_train)

# make predictions on test set
y_pred=svc.predict(X_test)

# compute and print accuracy score
print('Model f1 score with C=10000 and rbf kernel hyperparameters: {0:0.4f}'. format(f1_score(y_test, y_pred)))

Model f1 score with C=10000 and rbf kernel hyperparameters: 0.7916


In [11]:
accuracy_score(y_test, y_pred)

0.6882943143812709

In [17]:
svc=SVC(C=100000)  #SVM Default is kernel='rbf'

# fit classifier to training set
svc.fit(X_train,y_train)

# make predictions on test set
y_pred=svc.predict(X_test)

# compute and print accuracy score
print('Model f1 score with C=100000 and rbf kernel hyperparameters: {0:0.4f}'. format(f1_score(y_test, y_pred)))

Model f1 score with C=100000 and rbf kernel hyperparameters: 0.7908


In [18]:
accuracy_score(y_test, y_pred)

0.6903010033444816

# Run SVM with linear kernel and C=100.0

In [None]:
import libsvm
from libsvm.svmutil import *

In [None]:
problem = svm_problem(y_train,X_train)
parameter = svm_parameter('-c 1 -t 0 -s 0')

## Check for overfitting and underfitting