In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif

In [None]:
# Load the dataset
df = pd.read_csv('heart_attack_prediction_dataset.csv')

# Remove unwanted columns
df = df.drop(['Country', 'Continent', 'Hemisphere', 'Patient ID'], axis=1)

# Encode 'Sex': Male=0, Female=1
df['Sex'] = df['Sex'].map({'Male': 0, 'Female': 1})

# Split 'Blood Pressure' column into 'BP_hi' and 'BP_lo'
df[['BP_hi', 'BP_lo']] = df['Blood Pressure'].str.split('/', expand=True).astype(float)

# Drop the original 'Blood Pressure' column
df = df.drop('Blood Pressure', axis=1)

# Encode 'Diet': Healthy=2, Average=1, Unhealthy=0
df['Diet'] = df['Diet'].map({'Healthy': 2, 'Average': 1, 'Unhealthy': 0})

df.head()

In [None]:
df.info()

In [None]:
# Define features to scale (exclude 'Heart Attack Risk')
features_to_scale = [col for col in df.columns if col != 'Heart Attack Risk']

# Custom scaling to [-1, +1] for all features except 'Heart Attack Risk'
for feature in features_to_scale:
    min_val = df[feature].min()
    max_val = df[feature].max()
    df[feature] = -1 + 2 * (df[feature] - min_val) / (max_val - min_val) if max_val != min_val else df[feature]

# Verify the scaling (optional)
print(df[features_to_scale].describe())

In [None]:
df.head()

In [None]:
# Correlation heatmap
plt.figure(figsize=(20, 10))
corr_matrix = df.corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, fmt='.3f')
plt.title('Feature Correlation Matrix')
plt.tight_layout()
plt.show()

In [None]:
from sklearn.model_selection import train_test_split

# Features and target
X = df.drop(['Heart Attack Risk', 'Previous Heart Problems'], axis=1)
y = df['Heart Attack Risk']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression

model_lr = LogisticRegression(max_iter=1000, class_weight={0: 1.0, 1: 1.8})
model_lr.fit(X_train, y_train)
y_pred_lr = model_lr.predict(X_test)

In [None]:
y_pred

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Predictions
y_pred = model_lr.predict(X_test)

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='coolwarm', cbar=False)
plt.title('Confusion Matrix for Logistic Regression')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()



In [None]:
# Calculate accuracy from confusion matrix
accuracy = (cm[0, 0] + cm[1, 1]) / cm.sum()
print(f"Accuracy: {accuracy:.2f}")

In [None]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

def create_model(learning_rate=0.001, dropout_rate=0.4):
    model = Sequential([
        Dense(32, activation='relu', input_shape=(X_train_res.shape[1],), kernel_regularizer='l2'),
        Dropout(dropout_rate),
        Dense(16, activation='relu', kernel_regularizer='l2'),
        Dropout(dropout_rate / 2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Train model
model = create_model()
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train_res, y_train_res, epochs=100, batch_size=32, validation_split=0.2, 
                   class_weight={0: 1.0, 1: 1.5}, verbose=1, callbacks=[early_stopping])

In [None]:
# Predict with probability inspection and threshold adjustment
y_prob = model.predict(X_test, verbose=0)
print("Sample Probabilities:", y_prob[:10])
print("Min Probability:", np.min(y_prob))
print("Max Probability:", np.max(y_prob))


In [None]:
# Optimize threshold using ROC
from sklearn.metrics import roc_curve, auc
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = thresholds[optimal_idx]
print(f"Optimal Threshold: {optimal_threshold:.3f}")



In [None]:
# Apply optimized threshold
y_pred = (y_prob >= optimal_threshold).astype(int)

# Confusion matrix and accuracy
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='coolwarm', cbar=False)
plt.title('Confusion Matrix for Neural Network')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")