In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [24]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor


In [4]:
data = pd.read_csv('/content/drive/MyDrive/health_data/health_risk_data.csv')

In [6]:
data = {
    'Age': np.random.randint(20, 80, size=100),
    'BMI': np.random.uniform(18, 35, size=100),
    'Blood_Pressure': np.random.randint(90, 180, size=100),
    'Cholesterol': np.random.randint(150, 250, size=100),
    'Smoking_Status': np.random.choice(['Yes', 'No'], size=100),
    'Physical_Activity': np.random.choice(['Low', 'Medium', 'High'], size=100),
    'Diet_Quality': np.random.choice(['Poor', 'Average', 'Good'], size=100),
    'Health_Risk_Score': np.random.randint(1, 10, size=100)
}

In [7]:
df = pd.DataFrame(data)

In [8]:
label_encoders = {}
for column in ['Smoking_Status', 'Physical_Activity', 'Diet_Quality']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

In [9]:
df = df.fillna(df.mean())

# Select features and target variable
X = df.drop('Health_Risk_Score', axis=1)
y = df['Health_Risk_Score']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)

# Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [25]:
#svr_model = SVR(kernel='rbf')  # Using the Radial Basis Function kernel
#svr_model.fit(X_train_scaled, y_train)
#rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
#rf_model.fit(X_train_scaled, y_train)
gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
gb_model.fit(X_train_scaled, y_train)


In [26]:
#y_pred_svr = svr_model.predict(X_test_scaled)
#y_pred_rf = rf_model.predict(X_test_scaled)
y_pred_gb = gb_model.predict(X_test_scaled)

In [27]:
mae_gb = mean_absolute_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(mean_squared_error(y_test, y_pred_gb))
r2_gb = r2_score(y_test, y_pred_gb)

print(f'Gradient Boosting Regressor - Mean Absolute Error: {mae_gb:.2f}')
print(f'Gradient Boosting Regressor - Root Mean Squared Error: {rmse_gb:.2f}')
print(f'Gradient Boosting Regressor - R-squared: {r2_gb:.2f}')

Gradient Boosting Regressor - Mean Absolute Error: 2.15
Gradient Boosting Regressor - Root Mean Squared Error: 2.48
Gradient Boosting Regressor - R-squared: 0.01


In [18]:
mae_rf = mean_absolute_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))
r2_rf = r2_score(y_test, y_pred_rf)

print(f'Random Forest Regressor - Mean Absolute Error: {mae_rf:.2f}')
print(f'Random Forest Regressor - Root Mean Squared Error: {rmse_rf:.2f}')
print(f'Random Forest Regressor - R-squared: {r2_rf:.2f}')

Random Forest Regressor - Mean Absolute Error: 2.33
Random Forest Regressor - Root Mean Squared Error: 2.64
Random Forest Regressor - R-squared: -0.12


In [14]:
print(f'Support Vector Regression - Mean Absolute Error: {mae_svr:.2f}')
print(f'Support Vector Regression - Root Mean Squared Error: {rmse_svr:.2f}')
print(f'Support Vector Regression - R-squared: {r2_svr:.2f}')

Support Vector Regression - Mean Absolute Error: 2.42
Support Vector Regression - Root Mean Squared Error: 2.82
Support Vector Regression - R-squared: -0.27
