In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pickle

# Load heart disease dataset
heart_dataset = pd.read_csv("../datasets/heart.csv")

# Exploratory Data Analysis
print(heart_dataset.head())
print(heart_dataset.shape)
print(heart_dataset.describe())
print(heart_dataset['target'].value_counts())
print(heart_dataset.groupby('target').mean())

# Splitting features and target
X_heart = heart_dataset.drop(columns=['target'], axis=1)
y_heart = heart_dataset['target']

# Splitting dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X_heart, y_heart, test_size=0.2, random_state=42)

# Training the SVM model
heart_model = SVC(kernel='linear')
heart_model.fit(X_train, Y_train)

# Making predictions
y_pred = heart_model.predict(X_test)

# Evaluating model accuracy
print("Heart Disease Prediction Accuracy:", accuracy_score(Y_test, y_pred))

# Saving model
filename = 'heart_disease_model.sav'
pickle.dump(heart_model, open(filename, 'wb'))


   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  
(303, 14)
              age         sex          cp    trestbps        chol         fbs  \
count  303.000000  303.000000  303.000000  303.000000  303.000000  303.000000   
mean    54.366337    0.683168    0.966997  131.623762  246.264026    0.148515   
std      9.082101    0.466011    1.032052   17.538143   51.830751    0.356198   
min     29.000000    0.000000    0.000000   94.

In [3]:
print("Feature Columns:")
for column in X_heart.columns:
    print(column)


Feature Columns:
age
sex
cp
trestbps
chol
fbs
restecg
thalach
exang
oldpeak
slope
ca
thal


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import pickle

# Load heart disease dataset
heart_dataset = pd.read_csv("../datasets/heart.csv")

# Exploratory Data Analysis
print(heart_dataset.head())
print(heart_dataset.shape)
print(heart_dataset.describe())
print(heart_dataset['target'].value_counts())
print(heart_dataset.groupby('target').mean())

# Splitting features and target
X_heart = heart_dataset.drop(columns=['target'], axis=1)
y_heart = heart_dataset['target']

# Splitting dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X_heart, y_heart, test_size=0.2, random_state=42)

# Standardizing the data for models that might benefit from scaling (Logistic Regression, KNN, SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# List of models to train
models = {
    'SVM': SVC(kernel='linear'),
    'Logistic Regression': LogisticRegression(max_iter=200),
    'Random Forest': RandomForestClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

# Loop over models to train and evaluate them
for model_name, model in models.items():
    print(f"Training {model_name}...")
    
    # Choose scaled data for models that benefit from scaling (SVM, Logistic Regression, KNN)
    if model_name in ['SVM', 'Logistic Regression', 'K-Nearest Neighbors']:
        model.fit(X_train_scaled, Y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, Y_train)
        y_pred = model.predict(X_test)
    
    # Accuracy score
    accuracy = accuracy_score(Y_test, y_pred)
    print(f"{model_name} Accuracy: {accuracy * 100:.2f}%")
    
    # Confusion Matrix
    conf_matrix = confusion_matrix(Y_test, y_pred)
    print(f"Confusion Matrix for {model_name}:\n{conf_matrix}\n")
    
    # Save the model with the prefix 'heart_'
    filename = f'heart_{model_name.lower().replace(" ", "_")}_model.sav'
    pickle.dump(model, open(filename, 'wb'))


   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  
(303, 14)
              age         sex          cp    trestbps        chol         fbs  \
count  303.000000  303.000000  303.000000  303.000000  303.000000  303.000000   
mean    54.366337    0.683168    0.966997  131.623762  246.264026    0.148515   
std      9.082101    0.466011    1.032052   17.538143   51.830751    0.356198   
min     29.000000    0.000000    0.000000   94.