# Boston Housing Classification SVM Cross Validation

In [None]:
import sys
import pandas as pd
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline, Pipeline
sys.path.append("..")

In [None]:
inputFile = "../data/Boston_Housing_Data.csv"

## Read the data into DataFrame

In [None]:
df = pd.read_csv(inputFile,delimiter=";")
print(df.info())

## Feature selection 

In [None]:
df_features = df.drop(["MEDV","CAT"],axis=1) # drop label attribute from the features
df_labels = df[["CAT"]].copy()
display(df_features)
display(df_labels)

## Train test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df_features,df_labels,test_size=0.3,random_state=1234)
display (X_train)
display (X_test) 
display (y_train)
display (y_test)

## Pipelining

Support Vector Machine Classifier

In [None]:
lsvc = LinearSVC() 

Build the pipeline

In [None]:

scaler = StandardScaler()
pipeline = make_pipeline(scaler, lsvc)
print (pipeline)

Build the paramGrid

In [None]:
param_grid = dict(linearsvc__max_iter=[8000,10000,15000],\
                linearsvc__C=[1.0, 0.5, 0.1],\
                linearsvc__loss=["hinge","squared_hinge"])                 \
                                 

Build the CrossValidator 

In [None]:
grid_search = GridSearchCV(pipeline, param_grid=param_grid,cv=5, scoring="accuracy",return_train_score=True)
grid_search.fit(X_train, y_train["CAT"])

## Show best Model 

In [None]:
print (grid_search.best_params_)
print (grid_search.best_estimator_)
print (grid_search.best_score_)

## Test the model

In [None]:
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print("Test Error = " ,(1.0 - accuracy))

## Verify the best model

In [None]:
pipeline.set_params(**grid_search.best_params_)
pipeline.fit(X_train,y_train["CAT"])
y_pred_opt = pipeline.predict(X_test)
accuracy = accuracy_score(y_test,y_pred_opt)
print("Test Error = " ,(1.0 - accuracy))