In [33]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.model_selection import train_test_split #for data preprocessing and crass validating 
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC #for SVMs

from statistics import mean
from hyperopt import Trials, hp, fmin, tpe, STATUS_OK, space_eval #for hyperparameter tuning and minimizing

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/heart-attack-analysis-prediction-dataset/o2Saturation.csv
/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv


In [34]:
df = pd.read_csv('/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv')
df.info()
df.head()
x_train, x_test, y_train, y_test = train_test_split(df.drop('output', axis = 1), df['output'])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trtbps    303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalachh  303 non-null    int64  
 8   exng      303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slp       303 non-null    int64  
 11  caa       303 non-null    int64  
 12  thall     303 non-null    int64  
 13  output    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [35]:
space = {
    'C' : hp.choice('C', [0.5, 1, 10, 100]),
    'gamma' : hp.choice('gamma', [1, 0.1, 0.01, 0.001, 0.0001] + ['auto', 'scale']),
    'kernel' : hp.choice('kernel', ['rbf', 'sigmoid', 'linear', 'poly']),
    'probability' : hp.choice('probability', [False]),
    'degree' : hp.choice('degree', [1,2])
#     'C' : hp.choice('C', [10]),
#     'gamma' : hp.choice('gamma', [0.01] + ['scale']),
#     'kernel' : hp.choice('kernel', ['poly'])
} #defines the space in which we do hyperparameter tuning for C, gamma and kernel
kfold = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 0)

def objective(params) : #objective function to minimize for hyperparameter tuning
    svc = SVC(**params) #grab all keyword paramaters
    # cross_val_score takes in object to fit, x, y shape, cv generator (in this case kfold), 
    #scoring metric, and number of parallel processings (just do 1 cuz h0m3l355)
    print(params)
    scores = cross_val_score(svc, x_train, y_train, cv = kfold, scoring = 'accuracy', n_jobs = -1)
    best_score = mean(scores) 
    loss = -best_score
    return {'loss': loss, 'params': params, 'status': STATUS_OK}
    
num_trials = Trials()
best = fmin(fn = objective, space = space, algo = tpe.suggest, max_evals = 30, trials = num_trials)

{'C': 1, 'degree': 1, 'gamma': 0.001, 'kernel': 'poly', 'probability': False}
{'C': 10, 'degree': 1, 'gamma': 0.01, 'kernel': 'poly', 'probability': False}
{'C': 100, 'degree': 1, 'gamma': 1, 'kernel': 'linear', 'probability': False}    
{'C': 100, 'degree': 1, 'gamma': 'scale', 'kernel': 'linear', 'probability': False}
{'C': 10, 'degree': 1, 'gamma': 'scale', 'kernel': 'linear', 'probability': False}
{'C': 100, 'degree': 1, 'gamma': 1, 'kernel': 'linear', 'probability': False}    
{'C': 1, 'degree': 1, 'gamma': 1, 'kernel': 'sigmoid', 'probability': False}     
{'C': 0.5, 'degree': 2, 'gamma': 'scale', 'kernel': 'poly', 'probability': False}
{'C': 0.5, 'degree': 1, 'gamma': 'auto', 'kernel': 'rbf', 'probability': False}  
{'C': 10, 'degree': 2, 'gamma': 0.001, 'kernel': 'rbf', 'probability': False}    
{'C': 1, 'degree': 1, 'gamma': 0.01, 'kernel': 'linear', 'probability': False}    
{'C': 100, 'degree': 2, 'gamma': 0.0001, 'kernel': 'rbf', 'probability': False}   
{'C': 0.5, 'degree'

In [37]:
svc = SVC(C = space_eval(space, best)['C'], gamma = space_eval(space, best)['gamma'], kernel = space_eval(space, best)['kernel'],
         degree = space_eval(space, best)['degree'], probability = space_eval(space, best)['probability']) 
svc.fit(x_train, y_train)
svc.score(x_test, y_test)

0.8289473684210527