# Support Vector Machines

In [28]:
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_fscore_support
from sklearn.preprocessing import StandardScaler

In [29]:
df_train = pd.read_csv('../train_insurance_one_hot.csv')
df_test = pd.read_csv('../test_insurance_one_hot.csv')
df_train['expensive'] = df_train['charges'] > 25000
df_test['expensive'] = df_test['charges'] > 25000
df_train = df_train.drop(columns=['charges','chargeGroup'])
df_test = df_test.drop(columns=['charges','chargeGroup'])

## a)

In [48]:
#use 3 features
x_train_3=df_train[["age","bmi","smoker_yes"]]
y_train_3=df_train["expensive"]

x_test_3=df_test[["age","bmi","smoker_yes"]]
y_test_3=df_test["expensive"]

SVM_3=SVC(C=10, kernel='linear', gamma = 'auto')
SVM_3.fit(x_train_3, y_train_3)
SVM_3_predict=SVM_3.predict(x_test_3)

#use all features
x_train_all=df_train.drop(columns=['id','expensive'])
y_train_all=df_train["expensive"]
x_test_all=df_test.drop(columns=['id','expensive'])
y_test_all=df_test["expensive"]

SVM_all=SVC(C=10, kernel='linear', gamma = 'auto')
SVM_all.fit(x_train_all, y_train_all)
SVM_all_predict=SVM_all.predict(x_test_all)

print("Confusion matrix (3 features):")
print(confusion_matrix(y_test_3, SVM_3_predict))

print("\nConfusion matrix (all features, except id):")
print(confusion_matrix(y_test_all, SVM_all_predict))


#display coef

coef_3=pd.DataFrame({
    "feature": x_train_3.columns,
    "coefficient": SVM_3.coef_.ravel(),
})

coef_all=pd.DataFrame({
    "feature": x_train_all.columns,
    "coefficient": SVM_all.coef_.ravel(),
})

display(coef_3)
display(coef_all)

Confusion matrix (3 features):
[[220   3]
 [  9  36]]

Confusion matrix (all features, except id):
[[220   3]
 [  9  36]]


Unnamed: 0,feature,coefficient
0,age,0.026161
1,bmi,0.152631
2,smoker_yes,3.405739


Unnamed: 0,feature,coefficient
0,age,0.022779
1,bmi,0.160733
2,children,-0.020815
3,sex_female,-0.023882
4,sex_male,0.023882
5,smoker_no,-1.705397
6,smoker_yes,1.705397
7,region_northeast,0.174317
8,region_northwest,0.016349
9,region_southeast,-0.181235


## b)

In [44]:
from sklearn.metrics import accuracy_score, precision_score

acc_3 = accuracy_score(y_test_3, SVM_3_predict)
prec_3 = precision_score(y_test_3, SVM_3_predict)

acc_all = accuracy_score(y_test_all, SVM_all_predict)
prec_all = precision_score(y_test_all, SVM_all_predict)

print("3-feature SVM:  accuracy =", acc_3, ", precision =", prec_3)
print("All-feature SVM: accuracy =", acc_all, ", precision =", prec_all)

3-feature SVM:  accuracy = 0.9552238805970149 , precision = 0.9230769230769231
All-feature SVM: accuracy = 0.9552238805970149 , precision = 0.9230769230769231


## c)

In [50]:
#use feature 3
scaler_3 = StandardScaler()
x_train_3_scaled=scaler_3.fit_transform(x_train_3)
x_test_3_scaled=scaler_3.transform(x_test_3)

SVM_3_scaled=SVC(C=10, kernel='linear', gamma = 'auto')
SVM_3_scaled.fit(x_train_3_scaled, y_train_3)
SVM_3_scaled_predict=SVM_3_scaled.predict(x_test_3_scaled)

print("Confusion matrix (3 features, scaled):")
cm_3_scaled = confusion_matrix(y_test_3, SVM_3_scaled_predict)
print(cm_3_scaled)


acc_3_scaled  = accuracy_score(y_test_3, SVM_3_scaled_predict)
prec_3_scaled = precision_score(y_test_3, SVM_3_scaled_predict)
print("\nAccuracy  (3 features, scaled):", acc_3_scaled)
print("Precision (3 features, scaled):", prec_3_scaled)

#all feature
scaler_all=StandardScaler()
x_train_all_scaled=scaler_all.fit_transform(x_train_all)
x_test_all_scaled=scaler_all.transform(x_test_all)
SVM_all_scaled=SVC(C=10, kernel='linear', gamma = 'auto')
SVM_all_scaled.fit(x_train_all_scaled, y_train_all)
SVM_all_scaled_predict=SVM_all_scaled.predict(x_test_all_scaled)



print("\nConfusion matrix (all features, scaled):")
cm_all_scaled = confusion_matrix(y_test_all, SVM_all_scaled_predict)
print(cm_all_scaled)

acc_all_scaled  = accuracy_score(y_test_all, SVM_all_scaled_predict)
prec_all_scaled = precision_score(y_test_all, SVM_all_scaled_predict)

print("Accuracy  (all features, scaled):", acc_all_scaled)
print("Precision (all features, scaled):", prec_all_scaled)

Confusion matrix (3 features, scaled):
[[220   3]
 [  9  36]]

Accuracy  (3 features, scaled): 0.9552238805970149
Precision (3 features, scaled): 0.9230769230769231

Confusion matrix (all features, scaled):
[[221   2]
 [  9  36]]
Accuracy  (all features, scaled): 0.9589552238805971
Precision (all features, scaled): 0.9473684210526315


## d)

In [53]:
for C in [0.01,0.1,1,10,100]:
    svm_temp=SVC(C=C, kernel='linear', gamma='auto')
    svm_temp.fit(x_train_all, y_train_all)
    predict_temp=svm_temp.predict(x_test_all)
    acc_temp=accuracy_score(y_test_all, predict_temp)
    print(f"C={C}      test accuracy ={acc_temp}")

C=0.01      test accuracy =0.8694029850746269
C=0.1      test accuracy =0.9514925373134329
C=1      test accuracy =0.9552238805970149
C=10      test accuracy =0.9552238805970149
C=100      test accuracy =0.9589552238805971
