In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv('/mnt/data/healthcare_dataset.csv')

# Drop unnecessary columns
df_clean = df.drop(columns=[
    'Name', 
    'Date of Admission', 
    'Discharge Date'
])

# Encode categorical columns
label = LabelEncoder()
for col in df_clean.select_dtypes(include=['object']).columns:
    df_clean[col] = label.fit_transform(df_clean[col])

# Define features and target
X = df_clean.drop(columns=['Test Results'])
y = df_clean['Test Results']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Model training
model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Model Performance:")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RÂ² Score: {r2:.4f}")

# Predicted vs Actual Plot
plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred)
plt.xlabel('Actual Test Results')
plt.ylabel('Predicted Test Results')
plt.title('Actual vs Predicted Test Results')
plt.tight_layout()
plt.show()
