In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import RobustScaler # or StandardScaler, if preferred
import pickle
import numpy as np

# Data Collection and Processing
heart_data = pd.read_csv('Datasets/heart_disease_data.csv')


# heart_data.info()
# heart_data.describe()

# Feature Engineering (if needed, add new features here)
# ...

# Splitting Features and Target
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']  

# Data Scaling using RobustScaler (or StandardScaler if preferred)
scaler = RobustScaler()  
X = scaler.fit_transform(X)

# Splitting Data into Training and Testing Sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

# Model Training (Logistic Regression)
model = LogisticRegression()

# Hyperparameter Tuning using GridSearchCV
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],  
    'penalty': ['l1', 'l2'],  
    'solver': ['liblinear', 'saga'] 
}
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')  
grid_search.fit(X_train, Y_train)
best_model = grid_search.best_estimator_

# Model Evaluation
Y_train_pred = best_model.predict(X_train)
training_accuracy = accuracy_score(Y_train_pred, Y_train)
print('Accuracy on Training data:', training_accuracy)

Y_test_pred = best_model.predict(X_test)
test_accuracy = accuracy_score(Y_test_pred, Y_test)
print('Accuracy on Test data:', test_accuracy)

# Building a Predictive System
def predict_heart_disease(input_data):
    """Predicts heart disease based on input data."""
    input_data_as_numpy_array = np.asarray(input_data)
    input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)
    prediction = best_model.predict(input_data_reshaped) # Using best_model
    if prediction[0] == 0:
        return 'The Person does not have a Heart Disease'
    else:
        return 'The Person has Heart Disease'


input_data = (57, 0, 0, 120, 354, 0, 1, 163, 1, 0.6, 2, 0, 2)
result = predict_heart_disease(input_data)
print(result)

# Saving the Trained Model
filename = 'heart_disease_model.sav'
pickle.dump(best_model, open(filename, 'wb')) # Saving best_model

Accuracy on Training data: 0.8553719008264463
Accuracy on Test data: 0.819672131147541
The Person has Heart Disease
