In [1]:
import pandas as pd
from sqlalchemy import create_engine

In [2]:
csv_file2 = "./resources/heart.csv"
heart_df = pd.read_csv(csv_file2)
heart_df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
# Set features
# selected_features = df[['names', 'of', 'selected', 'features', 'here']]
selected_features = heart_df

# Create a Train Test Split

In [4]:
#Use location_desc for the y values
# Assign x and y values
X = heart_df.drop("ca", axis=1)
y = heart_df["ca"]

print(X.shape, y.shape)

(303, 13) (303,)


In [5]:
# Import Dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=115, stratify=y)

In [6]:
X_train.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,thal,target
58,34,1,3,118,182,0,0,174,0,0.0,2,2,1
285,46,1,0,140,311,0,1,120,1,1.8,1,3,0
270,46,1,0,120,249,0,0,144,0,0.8,2,3,0
189,41,1,0,110,172,0,0,158,0,0.0,2,3,0
184,50,1,0,150,243,0,0,128,0,2.6,1,3,0


# 

# Pre-processing¶
Scale the data using the MinMaxScaler and perform some feature selection



In [8]:
# Scale the data
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Train Model - SVM¶
Use Support Vector Machine (SVM) model

In [9]:
# Create the SVC Model
from sklearn.svm import SVC 
model = SVC(kernel='linear')
model.fit(X_train_scaled, y_train)

SVC(kernel='linear')

In [11]:
print(f"Training Data Score: {model.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {model.score(X_test_scaled, y_test)}")

Training Data Score: 0.6740088105726872
Testing Data Score: 0.5921052631578947


# 

# Hyperparameter Tuning¶
Use GridSearchCV to tune the SVM model's parameters


In [12]:
# Create the GridSearchCV model
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10],
              'gamma': [0.0001, 0.001, 0.01]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [13]:
# Fit the SVM model using the grid search estimator. 
# This will take the SVM model and try each combination of parameters
grid.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.609, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.565, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.600, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.578, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.600, total=   0.0s
[CV] C=1, gamma=0.001 ................................................
[CV] .................... C=1, gamma=0.001, score=0.609, total=   0.0s
[CV] C=1, gamma=0.001 ................................................
[CV] ............

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:    0.2s finished


GridSearchCV(estimator=SVC(kernel='linear'),
             param_grid={'C': [1, 5, 10], 'gamma': [0.0001, 0.001, 0.01]},
             verbose=3)

In [14]:
# List the best parameters for this dataset
print(f"Best Parameters: {grid.best_params_}")

# List the best score
print(f"Best Score: {grid.best_score_}")

Best Parameters: {'C': 10, 'gamma': 0.0001}
Best Score: 0.5904347826086956


In [15]:
print(f"Training Grid Score: {grid.score(X_train_scaled, y_train)}")
print(f"Testing Grid Score: {grid.score(X_test_scaled, y_test)}")


Training Grid Score: 0.6607929515418502
Testing Grid Score: 0.6578947368421053


In [16]:
#Make predictions with the hypertuned model
predictions = grid.predict(X_test_scaled)

In [17]:
# Calculate classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.78      0.86      0.82        44
           1       0.47      0.56      0.51        16
           2       0.43      0.30      0.35        10
           3       0.00      0.00      0.00         5
           4       0.00      0.00      0.00         1

    accuracy                           0.66        76
   macro avg       0.34      0.35      0.34        76
weighted avg       0.61      0.66      0.63        76



  _warn_prf(average, modifier, msg_start, len(result))


# 

# Save the Model

In [18]:
# Save the model 
import joblib
filename = 'MYCROFTXTREEM_SVM.sav'
joblib.dump(model, filename)

['MYCROFTXTREEM_SVM.sav']