<a href="https://colab.research.google.com/github/lekh-ch/Training-models/blob/main/Xgboost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
data_path = "/content/heart_rate_data.csv"
df = pd.read_csv(data_path)

# Ensure values fall within expected ranges
df = df[(df['Age'].between(18, 60)) &
        (df['Sleep Hours'].between(3.0, 10.0)) &
        (df['Exercise Frequency (Days/Week)'].between(1, 7)) &
        (df['Resting Heart Rate Before'].between(50, 100)) &
        (df['Resting Heart Rate After'].between(45, 95)) &
        (df['Max Heart Rate During Exercise'].between(120, 190))]

# Check distribution of heart rate differences
hr_difference = df['Resting Heart Rate After'] - df['Resting Heart Rate Before']
print("Heart rate difference distribution:")
print(hr_difference.describe())

# Adjusting HeartAttack definition using percentile-based threshold
threshold = np.percentile(hr_difference, 75)
df['HeartAttack'] = (hr_difference > threshold).astype(int)

# Checking updated class distribution
print("Updated Class Distribution:")
print(df['HeartAttack'].value_counts())

# Selecting features and target variable
X = df[['Age', 'Sleep Hours', 'Exercise Frequency (Days/Week)', 'Resting Heart Rate Before', 'Resting Heart Rate After', 'Max Heart Rate During Exercise']]
y = df['HeartAttack']

# Ensure we have at least two classes before splitting
if df['HeartAttack'].nunique() > 1:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    # Standardizing features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Training XGBoost model with hyperparameter tuning
    xgb_model = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, subsample=0.8, colsample_bytree=0.8, random_state=42)
    xgb_model.fit(X_train, y_train)

    # Making predictions
    y_pred = xgb_model.predict(X_test)

    # Evaluating the model
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy:.2f}')
    print('Classification Report:\n', classification_report(y_test, y_pred))

    # Function to test new data
    def test_model(sample):
        sample_df = pd.DataFrame([sample], columns=X.columns)
        sample_scaled = scaler.transform(sample_df)
        prediction = xgb_model.predict(sample_scaled)
        return "Heart Attack Risk" if prediction[0] == 1 else "No Risk"

    # Example test case
    test_sample = {
        'Age': 45,
        'Sleep Hours': 6.5,
        'Exercise Frequency (Days/Week)': 3,
        'Resting Heart Rate Before': 75,
        'Resting Heart Rate After': 65,
        'Max Heart Rate During Exercise': 160
    }
    print("Test Sample Prediction:", test_model(test_sample))
else:
    print("Not enough class variation for training.")

Heart rate difference distribution:
count    485.000000
mean      -6.946392
std        1.942673
min      -10.000000
25%       -9.000000
50%       -7.000000
75%       -5.000000
max       -4.000000
dtype: float64
Updated Class Distribution:
HeartAttack
0    418
1     67
Name: count, dtype: int64
Accuracy: 0.86
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.95      0.92        84
           1       0.43      0.23      0.30        13

    accuracy                           0.86        97
   macro avg       0.66      0.59      0.61        97
weighted avg       0.83      0.86      0.84        97

Test Sample Prediction: No Risk
