In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import mean_squared_error , r2_score 
import joblib 

In [3]:
df =pd.read_csv('astronaut_health_dataset.csv')
df.head()

Unnamed: 0,heart_rate,hrv,blood_pressure_sys,blood_pressure_dia,body_temperature,spo2,respiration_rate,co2_level,oxygen_level,cabin_temperature,...,sleep_quality,stress_level,mood_score,exercise_minutes,fatigue_level,water_intake,calories_intake,protein_intake,carbs_intake,health_score
0,80.0,61.8,109.9,60.9,36.57,96.2,12.7,0.679,20.98,23.1,...,4,7,9,45.1,9,2780.1,2185.9,122.8,381.0,57
1,73.6,56.1,117.8,71.4,36.98,96.1,14.1,0.422,20.75,23.6,...,1,4,4,67.3,7,2341.0,2043.4,191.9,246.7,58
2,81.5,45.7,108.1,75.9,37.01,93.4,13.2,0.671,20.91,22.7,...,5,9,5,32.7,7,2310.5,2708.6,188.4,251.6,81
3,90.2,37.2,115.4,98.9,37.24,96.3,14.4,0.577,21.36,19.1,...,3,7,1,52.1,5,3120.8,2727.9,112.0,352.6,63
4,72.7,53.4,91.6,85.6,36.32,98.5,15.4,0.671,21.64,21.8,...,5,1,2,35.1,7,2782.5,2376.1,144.0,356.4,62


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 24 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   heart_rate          1000 non-null   float64
 1   hrv                 1000 non-null   float64
 2   blood_pressure_sys  1000 non-null   float64
 3   blood_pressure_dia  1000 non-null   float64
 4   body_temperature    1000 non-null   float64
 5   spo2                1000 non-null   float64
 6   respiration_rate    1000 non-null   float64
 7   co2_level           1000 non-null   float64
 8   oxygen_level        1000 non-null   float64
 9   cabin_temperature   1000 non-null   float64
 10  radiation           1000 non-null   float64
 11  microgravity_force  1000 non-null   float64
 12  reaction_time       1000 non-null   float64
 13  sleep_hours         1000 non-null   float64
 14  sleep_quality       1000 non-null   int64  
 15  stress_level        1000 non-null   int64  
 16  mood_sc

In [7]:
features = ['respiration_rate', 'carbs_intake', 'water_intake', 'radiation', 
                'blood_pressure_sys', 'oxygen_level', 'sleep_hours', 
                'blood_pressure_dia', 'cabin_temperature', 'mood_score']

target = 'health_score'

In [8]:
x = df[features]
y = df[target]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [11]:
model  = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [12]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [13]:
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

Mean Squared Error: 439.005
R^2 Score: -0.7921497387328544


In [16]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from xgboost import XGBRegressor
from catboost import CatBoostRegressor

# Regression Models Dictionary

models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "K-Neighbors Regressor": KNeighborsRegressor(),
    "XGB Regressor": XGBRegressor(),
    "CatBoost Regressor": CatBoostRegressor(verbose=False),
    "AdaBoost Regressor": AdaBoostRegressor()
}

In [17]:
# Create a function which can evaluate models and return a report 
def evaluate_models(X, y, models):
    '''
    This function takes in X and y and models dictionary as input
    It splits the data into Train Test split
    Iterates through the given model dictionary and evaluates the metrics
    Returns: Dataframe which contains report of all models metrics with cost
    '''
    # separate dataset into train and test
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
    

    models_list = []
    scores = []
    
    for i in range(len(list(models))):
        model = list(models.values())[i]
        model.fit(X_train, y_train) # Train model

        # Make predictions
        y_pred = model.predict(X_test)

        score = r2_score(y_test,y_pred)
        
        model_name = list(models.keys())[i]
        print(f'---- score for --- {model_name} ----')
        print(f"{score}")
        models_list.append(model_name)
        scores.append(score)
    
    print()
    
    report = pd.DataFrame()
    report['Model_name'] = models_list
    report['Score'] = scores        
    return report

In [19]:
report = evaluate_models(x, y, models)

---- score for --- Linear Regression ----
0.0003024455240657753
---- score for --- Random Forest ----
-0.044648130306989
---- score for --- Decision Tree ----
-0.925518451992162
---- score for --- Gradient Boosting ----
-0.04195636597791319
---- score for --- Ridge Regression ----
0.0005366815116295554
---- score for --- Lasso Regression ----
-0.004634856487471195
---- score for --- K-Neighbors Regressor ----
-0.11844709340300463
---- score for --- XGB Regressor ----
-0.12242920560525272
---- score for --- CatBoost Regressor ----
-0.11618926323588075
---- score for --- AdaBoost Regressor ----
-0.005857007539145975



In [22]:
model  =  DecisionTreeRegressor()
model.fit(X_train, y_train)

In [25]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [26]:
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

Mean Squared Error: 471.49
R^2 Score: -0.9247632266492489


In [27]:
joblib.dump(model, 'asteroid_health_model.pkl')
print("Model saved as 'asteroid_health_model.pkl'")

Model saved as 'asteroid_health_model.pkl'
