In [159]:
'''
Name: Lauren Fisher
Class:  CPSC323
Date:   09/29/2021
Desc:   Trains and test SVM model with 3 different types of kernels and varying C values for breast cancer data
        via a grid search and tests accuracies on top performers
Notes:

C was determined in a grid search by doing a range from 0.1-> 1000 to show the difference prioritizing 
a soft margin vs. making few mistakes would reflect in final accuracies

RBF Kernel
rbf(x, y) = e^(-gamma|x-y|^2)
Given the above setup, we can control individual points' influence on the overall algorithm.
The larger gamma is, the closer other points must be to affect the model.



Polynomial Kernel
Poly(x, y) = 1 + sum(x * y)^d
Gamma was set to auto to take into account different params.
Use polynomial combinations of features up to a certain degree to theoretically provide a better fit



Sigmoid Kernel
Sigmoid(x, y) = tanh(gamma*y*x^T+ C)
Hyperbolic tangent function mostly preferred for neural networks. This kernel function is 
similar to a two-layer perceptron model of the neural network, which works as an activation function for neurons.

'''

import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report, average_precision_score
from statistics import mean



TEST_TO_TRAIN_RATIO = 0.25
NUM_TESTS = 100


def train__and_test_models(x1, y1, x2, y2):
    '''
    Parameters:
    ----------
    x1: x paramters fed from data set to train models
    x2: x parameters of data to test models (cancer)
    y1: binary classification of target malignant or benign
    y2:

    '''
    model = SVC()
    model.fit(x1,y1)
    
    param_grid = {'C': [0.1,1, 10, 100, 1000], 'kernel': ['rbf', "poly", "sigmoid"], 'gamma': ['auto']} 
    grid = GridSearchCV(SVC(),param_grid,refit=True)
    
    grid.fit(x1,y1)
    params = list(grid.cv_results_['params'])
    scores = list(grid.cv_results_['mean_test_score'])
    print('MODEL RESULTS')
    print('--------------------')

    for model, score in zip(params, scores):
        print('Model:', model, 'Accuracy: ',int(score*100))
        print('--------------------')
    
    #we can choose top performer for each kernel to compare accuracies
    models = [{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}, {'C': 1000, 'gamma': 'auto', 'kernel': 'poly'}, {'C': 100, 'gamma': 'auto', 'kernel': 'sigmoid'}]
    #top:
    print()
    print()
    for param in models:
        model = SVC(param['C'], param['kernel'])
        model.fit(x1,y1)
        predict = model.predict(x2)
        print(param)
        print('--------------------')
        print("RESULTS:")
        print(classification_report(y2,predict))
        print()
    


def main():
    log = ""
    cancer = load_breast_cancer()

    feat = pd.DataFrame(cancer['data'],columns=cancer['feature_names'])
    #min max norm
    norm_feat=(feat-feat.min())/(feat.max()-feat.min())
    target_feat = pd.DataFrame(cancer['target'],columns=['Cancer'])
    X_train, X_test, y_train, y_test = train_test_split(norm_feat, np.ravel(target_feat), test_size=0.4, random_state=101)
    train_svm(X_train, y_train, X_test, y_test)

In [160]:
main()

MODEL RESULTS
--------------------
Model: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'} Accuracy:  65
--------------------
Model: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'} Accuracy:  62
--------------------
Model: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'} Accuracy:  62
--------------------
Model: {'C': 1, 'gamma': 'auto', 'kernel': 'rbf'} Accuracy:  95
--------------------
Model: {'C': 1, 'gamma': 'auto', 'kernel': 'poly'} Accuracy:  62
--------------------
Model: {'C': 1, 'gamma': 'auto', 'kernel': 'sigmoid'} Accuracy:  93
--------------------
Model: {'C': 10, 'gamma': 'auto', 'kernel': 'rbf'} Accuracy:  98
--------------------
Model: {'C': 10, 'gamma': 'auto', 'kernel': 'poly'} Accuracy:  81
--------------------
Model: {'C': 10, 'gamma': 'auto', 'kernel': 'sigmoid'} Accuracy:  97
--------------------
Model: {'C': 100, 'gamma': 'auto', 'kernel': 'rbf'} Accuracy:  98
--------------------
Model: {'C': 100, 'gamma': 'auto', 'kernel': 'poly'} Accuracy:  94
--------------------
M

