# SVM Implementation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
%matplotlib inline

data = pd.read_csv("diabetic_data.csv")
drop_features = ['encounter_id', 'patient_nbr', 'weight', 
                 'payer_code', 'medical_specialty']
# Drop features to be ignored
data = data.drop(drop_features, axis=1)
# Drop Unusable rows
data = data[data["A1Cresult"] != "None"]

# Store Features
x = data.drop("A1Cresult", axis=1)
# Store Target values
y = data["A1Cresult"]

In [2]:
from sklearn import preprocessing

# Create encoder
encoder = preprocessing.LabelEncoder()

# Encode each feature
for feature in x.columns:
    x[feature] = encoder.fit_transform(x[feature])

In [8]:
import time
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

# Split Data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25) 

# Scale down data
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

In [9]:
# SVM linear
start_time = time.time()
lin_model = svm.SVC(kernel="linear", cache_size=7000)
lin_model.fit(x_train, y_train)
lin_model_yhat = lin_model.predict(x_test)
lin_model_time = time.time() - start_time

In [10]:
# SVM rbf - Radial Basis func
start_time = time.time()
rbf_model = svm.SVC(kernel="rbf", cache_size=7000)
rbf_model.fit(x_train, y_train)
rbf_model_yhat = rbf_model.predict(x_test)
rbf_model_time = time.time() - start_time

In [None]:
from sklearn.metrics import mean_squared_error

# Cross validation, 5 folds
lin_model_cv_score = cross_val_score(lin_model, x, y, cv=20)
rbf_model_cv_score = cross_val_score(rbf_model, x, y, cv=20)

# Mean squared error
lin_model_mse = mean_squared_error(y_test, lin_model_yhat)
rbf_model_mse = mean_squared_error(y_test, rbf_model_yhat)

print("Linear Model MSE")
print(lin_model_mse)
print("Linear Model Cross Validation")
print(lin_model_cv_score)
print("runtime (sec): ")
print(lin_model_time)

print("\n**********\n")

print("Radial Basis Function MSE")
print(rbf_model_mse)
print("Radial Basis Function Cross Validation")
print(rbf_model_cv_score)
print("runtime (sec): ")
print(rbf_model_time)