#### Applied Machine Learning- Mini Project 2 (Tasnim Ahmed - ta1743)

## Support Vector Machines

### Exploring the Data

In [1]:
import numpy as np
import sklearn
import pandas as pd
import matplotlib.pyplot as plt
import warnings

In [2]:
from sklearn.datasets import load_breast_cancer  # we will be working with breast cancer dataset

In [3]:
data_cancer = load_breast_cancer()          #load the data 
df_cancer = pd.DataFrame(data_cancer.data, columns=data_cancer.feature_names) 

In [4]:
df_cancer['Target'] = data_cancer.target    #loading the target of the dataset
df_cancer.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [5]:
X = df_cancer.drop('Target', axis = 'columns') #independent variables
y = df_cancer['Target']  #dependent variable

In [6]:
print(X.shape)
print(y.shape)

(569, 30)
(569,)


### Splitting and Scaling the Data

In [7]:
# importing the required modules to split and scale the data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [127]:
# Splitting the data into training (80%) and testing set (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 10, test_size = 0.2)

X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [9]:
# Scaling the training data
sc = StandardScaler()
X_train_tr = sc.fit_transform(X_train)
# Adding the bias feature
X_train_tr = np.append(arr = np.ones([X_train_tr.shape[0], 1]).astype(int), values = X_train_tr, axis= 1)

In [10]:
# Scaling the testing data
X_test_tr = sc.transform(X_test)
# Adding the bias feature
X_test_tr = np.append(arr = np.ones([X_test_tr.shape[0], 1]).astype(int), values = X_test_tr, axis= 1)

### Labelling the y Values

In [14]:
#relabel the y values of the training set
y_train_labelled = np.array([(2*y_train[i] - 1) for i in range(len((y_train)))])

### Implementing the SVM Primal Problem

In [120]:
def svm_gradient(X, y, theta, C):
    gradient = np.zeros(X.shape[1])
    for i in range(len(X)):
        if max(0, 1 - y[i]*(np.dot(X[i], theta))) == 0:
            gradient += theta
        else: 
            gradient += (theta - (C*y[i]*X[i]))
    return (gradient/len(X))

def svm_theta(X, y, lr, n_iters, C):
    theta = np.random.randn(X.shape[1])    #starting with a random theta
    for i in range(n_iters):
        sample_indices = np.random.choice(len(X), size = 100, replace = False)
        X_sample = X[sample_indices, :]
        y_sample = y[sample_indices]
        gradient = svm_gradient(X_sample, y_sample, theta, C)
        theta = theta - lr*gradient
    return theta

In [121]:
#initial hyperparameters                     
lr = 0.5                                       # learning rate
n_iters = 1000                                 # number of iterations
C = 0.9                                        # lower value of C prevent the model from overfitting
#model_theta = svm_gradient(X_train_tr, theta, y_train, C)
model_theta = svm_theta(X_train_tr, y_train_labelled, lr, n_iters, C)  
print("Theta value (w):", model_theta)

Theta value (w): [ 0.07136031 -0.09446261 -0.11773046 -0.09450721 -0.09417908  0.02987496
 -0.02706763 -0.06966478 -0.06807671 -0.0524938   0.05919726 -0.06199505
 -0.00353264 -0.05717399 -0.05895971  0.04315824  0.00451667 -0.00571681
 -0.02615108  0.00364898  0.02460134 -0.09843924 -0.11419171 -0.09762726
 -0.09525686  0.00476032 -0.05555936 -0.07630493 -0.08623195 -0.10148702
 -0.01962416]


#### Which training points are closest to the decision boundary?

In [122]:
distance = np.absolute(np.dot(X_train_tr, model_theta))/np.sqrt(np.dot(model_theta, model_theta))
min_distance = min(distance)
min_index = np.argmin(distance)

print("The minimum distance from the decision boundary is:", min_distance)
print("Which corresponds to the following observation from X: \n", X_train[min_index, :])

The minimum distance from the decision boundary is: 0.017419329951858124
Which corresponds to the following observation from X: 
 [1.742e+01 2.556e+01 1.145e+02 9.480e+02 1.006e-01 1.146e-01 1.682e-01
 6.597e-02 1.308e-01 5.866e-02 5.296e-01 1.667e+00 3.767e+00 5.853e+01
 3.113e-02 8.555e-02 1.438e-01 3.927e-02 2.175e-02 1.256e-02 1.807e+01
 2.807e+01 1.204e+02 1.021e+03 1.243e-01 1.793e-01 2.803e-01 1.099e-01
 1.603e-01 6.818e-02]


#### The decision function 

In [136]:
random_index = np.random.choice(len(X_train_tr), size = 1, replace = False)

In [135]:
mult = X_train_tr[random_index]*model_theta
print(mult)

[[ 0.07136031  0.00906609 -0.00049644  0.01556608  0.016511   -0.03631761
   0.02719185  0.05274312  0.05124622  0.04622128 -0.05861315  0.01747146
   0.00184575  0.02367727  0.01604921 -0.04719766 -0.00384646  0.00382324
   0.02443464 -0.00466171 -0.01554512  0.00029911 -0.01593439  0.01157965
   0.01087957 -0.00572885  0.04147204  0.03992407  0.04642337  0.095058
   0.00895083]]


#### Predicting the y-values

In [125]:
y_pred = np.dot(X_test_tr, model_theta)
y_predicted = [1 if i >= 0 else 0 for i in y_pred]

In [114]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

In [126]:
print("Precision:", precision_score(y_test, y_predicted))
print("Recall:", recall_score(y_test, y_predicted))
print("F1-Score", f1_score(y_test, y_predicted))
print("Confusion Matrix: \n", confusion_matrix(y_test, y_predicted))

Precision: 1.0
Recall: 0.9733333333333334
F1-Score 0.9864864864864865
Confusion Matrix: 
 [[39  0]
 [ 2 73]]


### Testing with blob generated data

#### Generating the blob data

In [128]:
from sklearn.datasets import make_blobs
X_blob, y_blob = make_blobs(n_samples=1000, centers=2, random_state=0)

In [129]:
# Splitting the blob data into training (80%) and testing set (20%)
Xb_train, Xb_test, yb_train, yb_test = train_test_split(X_blob, y_blob, random_state = 10, test_size = 0.2)

In [131]:
# Scaling the blob training data
scb = StandardScaler()
Xb_train_tr = scb.fit_transform(Xb_train)
# Adding the bias feature
Xb_train_tr = np.append(arr = np.ones([Xb_train_tr.shape[0], 1]).astype(int), values = Xb_train_tr, axis= 1)

In [133]:
# Scaling the blob testing data
Xb_test_tr = scb.transform(Xb_test)
# Adding the bias feature
Xb_test_tr = np.append(arr = np.ones([Xb_test_tr.shape[0], 1]).astype(int), values = Xb_test_tr, axis= 1)

In [138]:
yb_train_labelled = np.array([(2*yb_train[i] - 1) for i in range(len((yb_train)))])

In [139]:
#initial hyperparameters                     
lr = 0.5                                       # learning rate
n_iters = 1000                                 # number of iterations
C = 0.9                                        # lower value of C prevent the model from overfitting
theta = 1
model_theta_blob = svm_theta(Xb_train_tr, yb_train_labelled, lr, n_iters, C)  
print("Theta value (w):", model_theta_blob)

Theta value (w): [ 0.08494489  0.23154898 -0.63483156]


In [140]:
yb_pred = np.dot(Xb_test_tr, model_theta_blob)
yb_predicted = [1 if i >= 0 else 0 for i in yb_pred]

In [141]:
print("Precision:", precision_score(yb_test, yb_predicted))
print("Recall:", recall_score(yb_test, yb_predicted))
print("F1-Score", f1_score(yb_test, yb_predicted))
print("Confusion Matrix: \n", confusion_matrix(yb_test, yb_predicted))

Precision: 0.9433962264150944
Recall: 0.9523809523809523
F1-Score 0.9478672985781991
Confusion Matrix: 
 [[ 89   6]
 [  5 100]]
