# Explore the Settings of SVM

## 0. Load data

In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.metrics import accuracy_score

# change paths if necessary
TRAIN_SET_PATH = './StanceDataset/train.csv'
TEST_SET_PATH = './StanceDataset/test.csv'
TRAIN_FEATURE_PATH = 'clean_train_vec.npy'
TEST_FEATURE_PATH = 'clean_test_vec.npy'

In [2]:
# dictionaries for label encoding and decoding
encode_dict = {'Hillary Clinton': 0, 'Climate Change is a Real Concern': 1, 'Legalization of Abortion': 2, 'Atheism': 3, 'Feminist Movement': 4, 'Donald Trump': 5}
decode_dict = {0: 'Hillary Clinton', 1: 'Climate Change is a Real Concern', 2: 'Legalization of Abortion', 3: 'Atheism', 4: 'Feminist Movement', 5: 'Donald Trump'}

# Load the labels and features
# Yuhui: There is no 'Donald Trump' in training data
df_train = pd.read_csv(TRAIN_SET_PATH, engine='python', dtype='str', encoding ='latin1')
df_test = pd.read_csv(TEST_SET_PATH, engine='python', dtype='str', encoding ='latin1')
Y_train = np.array([encode_dict[t] for t in df_train['Target']])
Y_test = np.array([encode_dict[t] for t in df_test['Target']])
print("Load {} targets of training data,\n\t{} features of test data.\n".format(Y_train.shape, Y_test.shape))

X_train = np.load(TRAIN_FEATURE_PATH)
X_test = np.load(TEST_FEATURE_PATH)
print("Load {} features of training data,\n\t{} features of test data.\n".format(X_train.shape, X_test.shape))

Load (2914,) targets of training data,
	(1956,) features of test data.

Load (2914, 13459) features of training data,
	(1956, 13459) features of test data.



## 1. Default Settings

In [4]:
# Train
print("Training the SVM...")
clf = svm.SVC(C=1.0, kernel='rbf', decision_function_shape='ovr', class_weight=None)
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.40081799591002043


## 2. Regularization parameter

**C: float, default=1.0**

Refer: [Intuition for the regularization parameter in SVM](https://datascience.stackexchange.com/questions/4943/intuition-for-the-regularization-parameter-in-svm)

The regularization parameter (lambda) serves as a degree of importance that is given to misclassifications. SVM poses a quadratic optimization problem that looks for maximizing the margin between both classes and minimizing the number of misclassifications. However, for non-separable problems, in order to find a solution, the misclassification constraint must be relaxed, and this is done by setting the mentioned "regularization". 

If the regularization parameter is too small, the model will be underfitted. If the regularization parameter is too large, the model will be overfitted. 

### 2.1 C = 0.1

In [5]:
# Train
print("Training the SVM...")
clf = svm.SVC(C=0.1)
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.21063394683026584


### 2.2 C = 10

In [6]:
# Train
print("Training the SVM...")
clf = svm.SVC(C=10)
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.4212678936605317


### 2.3 C = 100

In [7]:
# Train
print("Training the SVM...")
clf = svm.SVC(C=100)
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.4212678936605317


## 3. Kernel type

**kernel: {‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’}, default=’rbf’**

Related parameters: 

- **degree: int, default=3**
- **gamma: {‘scale’, ‘auto’} or float, default=’scale’**
- **coef0: float, default=0.0**

### 3.1 kernel = 'linear'

$$K(x_i,x_j)=x_i^Tx_j$$

As the above equation shows, the linear kernel does not need other parameters. According to the experience, in this experiment, the feature for each sentence is large enough, the data may be linearly separable by mapping it to higher dimensions. In intuit, the linear kernel may get great results. 

In [8]:
# Train
print("Training the SVM...")
clf = svm.SVC(kernel='linear')
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.42484662576687116


### 3.2 kernel = 'poly'

$$K(x_i,x_j)=(\gamma x_i^Tx_j + r)^d, d>1$$

As the above equation shows, the polynomial kernel need 3 parameters, $d$, $\gamma$, $r$. In this experiment, the feature of each sentence is large, the linear kernel is enough to classify the data, so we do not need a polynomial kernel with slow training. 

### 3.3 kernel = 'rbf'

$$K(x_i,x_j)=exp(-\gamma ||x_i-x_j||^2),\gamma>0$$

As the above equation shows, the radial basis function (RBF) kernel need 1 parameters, $\gamma$. Let's ajust $\gamma$. 

The larger the gamma, the fewer support vectors, and the smaller the gamma value, the more support vectors. More support vectors could fit the model better for the training data, however, may lead to a bad performance on test data. Here, we only try 'scale' and 'auto' of $\gamma$ rather than other float numbers. 

In [10]:
# Train
print("Training the SVM...")
clf = svm.SVC(kernel='rbf', gamma='auto')
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.15081799591002046


### 3.4 kernel = 'sigmoid'

$$K(x_i,x_j)=tanh(\gamma x_i^Tx_j + r ), \gamma>0, r<0$$

As the above equation shows, the sigmoid kernel need 2 parameters, $\gamma$, $r$. 

In [15]:
# Train
print("Training the SVM...")
clf = svm.SVC(kernel='sigmoid')
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.4120654396728016


In [12]:
# Train
print("Training the SVM...")
clf = svm.SVC(kernel='sigmoid', gamma='auto')
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.15081799591002046


## 4. Class weight of regularization parameter

**class_weight: dict or ‘balanced’, default=None**

This parameter could set the regularization parameter `C` of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The ‘balanced’ mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as `n_samples / (n_classes * np.bincount(y))`. Let's try ‘balanced’ mode. 

In [7]:
# Train
print("Training the SVM...")
clf = svm.SVC(class_weight='balanced')
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.401840490797546


## 5. Best settings

From all the experiments above, we know that linear kernel and balanced class weight have a positive effect on the results. However, the regularization parameter changes when using different kernels, and it can not be too large or too small. Then we will check the different regularization parameters (`C=10`, `C=1`, `C=5`, `C=2.5`, `C=1.25`, `C=0.625`). In the end, we get the best settings as: 

In [8]:
# Train
print("Training the SVM...")
clf = svm.SVC(C=1, kernel='linear', class_weight='balanced')
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.42535787321063395


In [17]:
# Train
print("Training the SVM...")
clf = svm.SVC(C=10, kernel='linear', class_weight='balanced')
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.42484662576687116


In [3]:
# Train
print("Training the SVM...")
clf = svm.SVC(C=5, kernel='linear', class_weight='balanced')
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.42484662576687116


In [4]:
# Train
print("Training the SVM...")
clf = svm.SVC(C=2.5, kernel='linear', class_weight='balanced')
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.42484662576687116


In [5]:
# Train
print("Training the SVM...")
clf = svm.SVC(C=1.25, kernel='linear', class_weight='balanced')
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.42484662576687116


In [6]:
# Train
print("Training the SVM...")
clf = svm.SVC(C=0.625, kernel='linear', class_weight='balanced')
clf.fit(X_train, Y_train)
print("Done!")

# Test
print("Let's test the SVM!")
Y_pred = clf.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)
print("Accuracy score: {}".format(acc))

Training the SVM...
Done!
Let's test the SVM!
Accuracy score: 0.4279141104294479
