In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

# Load dataset
df = pd.read_csv("injury_augmented.csv")

# Check dataset structure
print(df.head())

# Convert categorical variables to numerical (if any)
df = pd.get_dummies(df)

# Define input features (X) and target variable (y)
X = df.drop(columns=["Recovery_Period"])
y = df["Recovery_Period"]

# Train-test split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"R² Score: {r2:.2f}")

# Save the trained model
joblib.dump(model, "injury_recovery_model.pkl")

print("Model training complete. Saved as 'injury_recovery_model.pkl'.")


   Callorie  Age  Weight  Fitness_Level      Injury Gender   Type  \
0      2500   28      65            0.8  Toe Injury      M  minor   
1      2400   31      70            0.7  Toe Injury      M  minor   
2      3300   29      67            0.8  Toe Injury      M  minor   
3      2500   30      68            0.8  Toe Injury      M  minor   
4      2400   25      55            0.7  Toe Injury      M  major   

   Recovery_Period  
0                2  
1                2  
2                2  
3                3  
4                6  
MAE: 1.47
MSE: 4.29
R² Score: 0.95
Model training complete. Saved as 'injury_recovery_model.pkl'.
