In [2]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load the dataset
data = pd.read_csv('/content/heart.csv')  # Update the path to your dataset

# Select important features
important_features = ['cp', 'ca', 'thalach', 'oldpeak', 'age', 'thal', 'trestbps', 'chol', 'exang', 'slope']
X_important = data[important_features]
y = data['target']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_important, y, test_size=0.2, random_state=42)

# Train the Logistic Regression model
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train, y_train)

# Cross-Validation
logreg_cv_scores = cross_val_score(logreg, X_important, y, cv=5)
print(f'Logistic Regression CV Accuracy: {logreg_cv_scores.mean() * 100:.2f}%')

# Final model evaluation
final_predictions = logreg.predict(X_test)
final_accuracy = accuracy_score(y_test, final_predictions)
print(f'Final Logistic Regression Accuracy: {final_accuracy * 100:.2f}%')
print(classification_report(y_test, final_predictions))

# Save the model
joblib.dump(logreg, 'final_logreg_model.pkl')


Logistic Regression CV Accuracy: 81.81%
Final Logistic Regression Accuracy: 86.89%
              precision    recall  f1-score   support

           0       0.86      0.86      0.86        29
           1       0.88      0.88      0.88        32

    accuracy                           0.87        61
   macro avg       0.87      0.87      0.87        61
weighted avg       0.87      0.87      0.87        61



['final_logreg_model.pkl']