In [21]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import os

# Load your dataset
file_path = '../Data_Analysis/stdz_data.csv'
data = pd.read_csv(file_path)

# Assuming 'features' contains the features and 'target' contains the target variable
features = data[['Sedentary Hours Per Day', 'BMI', 'Exercise Hours Per Week', 'Income']]
target = data['Heart Attack Risk']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train the Gradient Boosting model
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_model.fit(X_train, y_train)

# Make predictions on the test set
predictions = gb_model.predict(X_test)

# Calculate accuracy on the test set
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy of the Gradient Boosting model: {accuracy:.2f}")

# Trained model for predictions on new data
# Predict on the entire dataset:
all_predictions = gb_model.predict(features)
data['Predicted_Heart_Attack_Risk'] = all_predictions


directory = 'Data_Analysis'
if not os.path.exists(directory):
    os.makedirs(directory)

# Save the updated dataset 
updated_file_path = os.path.join(directory, 'stdz_data_with_predictions.csv')
data.to_csv(updated_file_path, index=False)


Accuracy of the Gradient Boosting model: 0.64


In [23]:
# data.info()

In [17]:
df = data[['Sedentary Hours Per Day', 'BMI', 'Exercise Hours Per Week', 'Cholesterol','Heart Attack Risk','Predicted_Heart_Attack_Risk']]
# df.to_csv("Heart_SQL.csv", index = False)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8763 entries, 0 to 8762
Data columns (total 6 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Sedentary Hours Per Day      8763 non-null   float64
 1   BMI                          8763 non-null   float64
 2   Exercise Hours Per Week      8763 non-null   float64
 3   Cholesterol                  8763 non-null   float64
 4   Heart Attack Risk            8763 non-null   int64  
 5   Predicted_Heart_Attack_Risk  8763 non-null   int64  
dtypes: float64(4), int64(2)
memory usage: 410.9 KB


In [18]:
from joblib import dump

# Save the trained Gradient Boosting model
model_file_path = 'gradient_boosting_model.joblib'
dump(gb_model, model_file_path)


['gradient_boosting_model.joblib']

In [19]:
import streamlit as st
from joblib import load
import numpy as np
import pandas as pd

# Load the pre-trained Gradient Boosting model
gb_model = load('gradient_boosting_model.joblib')  

# Streamlit app structure
def main():
    st.title('Heart Attack Risk Prediction')
    st.sidebar.title('Features')

    # Sidebar - User input features
    sedentary_hours = st.sidebar.slider('Sedentary Hours Per Day', 0, 24, 8, 1)
    bmi = st.sidebar.slider('BMI', 10, 50, 25, 1)
    exercise_hours = st.sidebar.slider('Exercise Hours Per Week', 0, 40, 3, 1)
    income = st.sidebar.slider('Income', 0, 200000, 50000, 500)

    # Make predictions using the loaded model
    user_input = np.array([[sedentary_hours, bmi, exercise_hours, income]])
    prediction = gb_model.predict(user_input)

    # Display prediction result
    st.write('Predicted Heart Attack Risk:', prediction[0])

if __name__ == '__main__':
    main()


