<a href="https://colab.research.google.com/github/mahmoodhamdi/basketball_pointsCounter_app/blob/main/heart_disease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [3]:
# Section 1: Read dataset and preprocess

# Read dataset from a CSV file
data = pd.read_csv('heart_disease_health_indicators_BRFSS2015.csv')

# Drop columns with more than 30% missing values
threshold = 0.3
data = data.dropna(thresh=len(data) * (1 - threshold), axis=1)

# One-hot encode categorical variables
data = pd.get_dummies(data, columns=['Sex', 'Education'], drop_first=True)

# Split data into features (X) and target variable (y)
X = data.drop(columns=['HeartDiseaseorAttack'])
y = data['HeartDiseaseorAttack']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
# Section 2: Standardize features using StandardScaler

# Initialize StandardScaler
scaler = StandardScaler()

# Fit and transform the training data
X_train = scaler.fit_transform(X_train)

# Transform the test data using the same scaler
X_test = scaler.transform(X_test)

In [14]:
# Initialize the Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)



In [13]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Display classification report
print(classification_report(y_test, y_pred))

# Train score
train_score = clf.score(X_train, y_train)
print(f'Training Accuracy: {round(train_score*100,2)}')


Accuracy: 0.90
              precision    recall  f1-score   support

         0.0       0.92      0.98      0.95     45968
         1.0       0.45      0.12      0.19      4768

    accuracy                           0.90     50736
   macro avg       0.68      0.55      0.57     50736
weighted avg       0.87      0.90      0.88     50736

Training Accuracy: 99.58
