<a href="https://colab.research.google.com/github/shahrooz-coder/freelancer/blob/main/heart_failure.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim

#Data loading

In [None]:
URL='https://raw.githubusercontent.com/shahrooz-coder/freelancer/main/heart.csv'
df=pd.read_csv(URL)

In [None]:
df.head()

In [None]:
df.describe()


In [None]:
df.info()

# Preproccessing

In [None]:
# Seperate features and label
df_features=df.drop('HeartDisease',axis=1)
df_label=df['HeartDisease']
# Seperate numerical and categorial features
numeric_cols = df_features.select_dtypes(include='number').columns
categorical_cols = df_features.select_dtypes(include='object').columns


In [None]:
scaler=MinMaxScaler(feature_range=(0,1))
df_features_numeric_cols=scaler.fit_transform(df_features[numeric_cols])
# Change array to dataframe
df_features_numeric_cols = pd.DataFrame(df_features_numeric_cols, columns=numeric_cols)

In [None]:
ohe=OneHotEncoder()
df_features_categorical_cols=ohe.fit_transform(df_features[categorical_cols])
# for better combination we should change it to Dataframe
df_features_categorical_cols=pd.DataFrame(df_features_categorical_cols.toarray(),columns=ohe.get_feature_names_out())

In [None]:
# Constructing final dataframe
df_features=pd.concat([df_features_numeric_cols,df_features_categorical_cols],axis=1)

In [None]:
df_features.tail()

In [None]:
X_train,X_test,y_train,y_test=train_test_split(df_features,df_label,test_size=0.2,shuffle=True,random_state=42)

# Finding best model
1.Logistic Regression\
2.SVM\
3.KNN\
4.Random Forest\
5.Naive Bayes\
6.Neural Network

# 1.Logistic Regression

In [None]:
lr= LogisticRegression()
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'max_iter': [100, 500, 1000]
}
lr_grid_search = GridSearchCV(lr, param_grid, cv=5, scoring='accuracy')
lr_grid_search.fit(X_train, y_train)
print("Best parameters for Logistic Regression:", lr_grid_search.best_params_)
print("Best accuracy for Logistic Regression:", lr_grid_search.best_score_)


In [None]:
best_model_lr = lr_grid_search.best_estimator_
y_pred = best_model_lr.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

# 2.SVM

In [None]:
svm = SVC()
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly']
}
svm_grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy')
svm_grid_search.fit(X_train, y_train)
print("Best parameters for SVM:", svm_grid_search.best_params_)
print("Best accuracy for SVM:", svm_grid_search.best_score_)


In [None]:
best_model_svm = svm_grid_search.best_estimator_
y_pred = best_model_svm.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

#3.KNN

In [None]:
knn=KNeighborsClassifier()
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}
knn_grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
knn_grid_search.fit(X_train, y_train)
print("Best parameters for KNN:", knn_grid_search.best_params_)
print("Best accuracy for KNN:", knn_grid_search.best_score_)

In [None]:
best_model_knn= knn_grid_search.best_estimator_
y_pred = best_model_knn.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

#4.Random forest

In [None]:
rf = RandomForestClassifier()
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
rf_grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
rf_grid_search.fit(X_train, y_train)
print("Best parameters for RF:", rf_grid_search.best_params_)
print("Best accuracy for RF:", rf_grid_search.best_score_)

In [None]:
best_model_rf= rf_grid_search.best_estimator_
y_pred = best_model_rf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

#5.Naive Bayes

In [None]:
gnb = GaussianNB() #GaussianNB doesn't have hyperparameter
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

#6.Neural Network

In [None]:
#turn dataframe to tensor+(.values)
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1,1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1,1)

In [None]:
X_train_tensor.shape,X_test_tensor.shape,y_train_tensor.shape,y_test_tensor.shape

In [None]:
input_dim = X_train_tensor.shape[1]
input_dim

In [None]:
class AdvancedNN(nn.Module):
    def __init__(self, input_dim, hidden1=64, hidden2=32, dropout_rate=0.3):
        super(AdvancedNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden1)
        self.bn1 = nn.BatchNorm1d(hidden1)
        self.fc2 = nn.Linear(hidden1, hidden2)
        self.bn2 = nn.BatchNorm1d(hidden2)
        self.fc3 = nn.Linear(hidden2, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.sigmoid(self.fc3(x))
        return x

In [None]:
param_grid = {
    'hidden1': [32, 64],
    'hidden2': [16, 32],
    'dropout_rate': [0.2, 0.3],
    'lr': [0.001, 0.01]
}
best_acc = 0
best_params = None
best_model = None

for h1 in param_grid['hidden1']:
    for h2 in param_grid['hidden2']:
        for dr in param_grid['dropout_rate']:
            for lr in param_grid['lr']:
                model = AdvancedNN(input_dim, hidden1=h1, hidden2=h2, dropout_rate=dr)
                criterion = nn.BCELoss()
                optimizer = optim.Adam(model.parameters(), lr=lr)

                epochs = 200
                for epoch in range(epochs):
                    model.train()
                    optimizer.zero_grad()
                    outputs = model(X_train_tensor)
                    loss = criterion(outputs, y_train_tensor)
                    loss.backward()
                    optimizer.step()

                model.eval()
                with torch.no_grad():
                    y_test_pred = (model(X_test_tensor) > 0.5).float()
                    acc = accuracy_score(y_test_tensor, y_test_pred)

                if acc > best_acc:
                    best_acc = acc
                    best_params = {'hidden1': h1, 'hidden2': h2, 'dropout_rate': dr, 'lr': lr}
                    best_model = model

print("Best Parameters:", best_params)
print("Best Test Accuracy:", best_acc)

In [None]:
final_model = AdvancedNN(input_dim, hidden1=best_params['hidden1'],
                         hidden2=best_params['hidden2'], dropout_rate=best_params['dropout_rate'])
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(final_model.parameters(), lr=best_params['lr'])

# آموزش کامل
epochs = 300
for epoch in range(epochs):
    final_model.train()
    optimizer.zero_grad()
    outputs = final_model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()


In [None]:
final_model.eval()
with torch.no_grad():
    y_test_pred = (final_model(X_test_tensor) > 0.5).float()

test_acc = accuracy_score(y_test_tensor, y_test_pred)
print("Test Accuracy:", test_acc)


In [None]:
# Logistic Regression
y_train_pred_lr = best_model_lr.predict(X_train)
y_test_pred_lr = best_model_lr.predict(X_test)

# SVM
y_train_pred_svm = best_model_svm.predict(X_train)
y_test_pred_svm = best_model_svm.predict(X_test)

# KNN
y_train_pred_knn = best_model_knn.predict(X_train)
y_test_pred_knn = best_model_knn.predict(X_test)

# Random Forest
y_train_pred_rf = best_model_rf.predict(X_train)
y_test_pred_rf = best_model_rf.predict(X_test)

# Naive Bayes
y_train_pred_nb = gnb.predict(X_train)
y_test_pred_nb = gnb.predict(X_test)

# Neural Network
final_model.eval()
with torch.no_grad():
    y_train_pred_nn = (final_model(X_train_tensor) > 0.5).float()
    y_test_pred_nn = (final_model(X_test_tensor) > 0.5).float()


train_acc = [
    accuracy_score(y_train, y_train_pred_lr),
    accuracy_score(y_train, y_train_pred_svm),
    accuracy_score(y_train, y_train_pred_knn),
    accuracy_score(y_train, y_train_pred_rf),
    accuracy_score(y_train, y_train_pred_nb),
    accuracy_score(y_train, y_train_pred_nn)
]

test_acc = [
    accuracy_score(y_test, y_test_pred_lr),
    accuracy_score(y_test, y_test_pred_svm),
    accuracy_score(y_test, y_test_pred_knn),
    accuracy_score(y_test, y_test_pred_rf),
    accuracy_score(y_test, y_test_pred_nb),
    accuracy_score(y_test, y_test_pred_nn)
]

models = ['LogReg', 'SVM', 'KNN', 'RandomForest', 'NaiveBayes', 'NeuralNet']


x = range(len(models))
plt.figure(figsize=(12,6))
plt.bar([i-0.15 for i in x], train_acc, width=0.3, label='Train Accuracy', color='skyblue')
plt.bar([i+0.15 for i in x], test_acc, width=0.3, label='Test Accuracy', color='orange')
plt.xticks(x, models)
plt.ylim(0,1)
plt.ylabel('Accuracy')
plt.title('Comparison of Train vs Test Accuracy for 6 Models')
plt.legend()
plt.show()


# So model Random forest and Neural Network have better accuracy resluts on this data

In [None]:
torch.save(final_model.state_dict(), "best_model.pth")
print("Model weights saved successfully!")

In [None]:
loaded_model = AdvancedNN(input_dim, hidden1=best_params['hidden1'],
                          hidden2=best_params['hidden2'], dropout_rate=best_params['dropout_rate'])

loaded_model.load_state_dict(torch.load("best_model.pth"))
loaded_model.eval()
print("Model loaded successfully!")


# Prediction by new data

In [None]:
new_data = {
    'Age': [55],
    'Sex': ['M'],
    'ChestPainType': ['ATA'],
    'RestingBP': [140],
    'Cholesterol': [250],
    'FastingBS': [0],
    'RestingECG': ['Normal'],
    'MaxHR': [150],
    'ExerciseAngina': ['N'],
    'Oldpeak': [1.5],
    'ST_Slope': ['Up']
}
new_df = pd.DataFrame(new_data)
# Seperate numerical and categorial features
numeric_cols = new_df.select_dtypes(include='number').columns
categorical_cols = new_df.select_dtypes(include='object').columns

# normalize
new_numeric = scaler.transform(new_df[numeric_cols])
new_numeric = pd.DataFrame(new_numeric, columns=numeric_cols)

# One-Hot Encode
new_categorical = ohe.transform(new_df[categorical_cols])
new_categorical = pd.DataFrame(new_categorical.toarray(), columns=ohe.get_feature_names_out())

new_features = pd.concat([new_numeric, new_categorical], axis=1)


In [None]:
new_tensor = torch.tensor(new_features.values, dtype=torch.float32)
# prediction
loaded_model.eval()
with torch.no_grad():
    prediction = (loaded_model(new_tensor) > 0.5).float()

print("Prediction (0=No HeartDisease, 1=HeartDisease):", int(prediction.item()))
