<a href="https://colab.research.google.com/github/nmansour67/skills-introduction-to-github/blob/main/Random_Forest_ML_Education.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# ============================================
# ER Patient Admission Prediction
# Simple Random Forest Model
# ============================================

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# ============================================
# STEP 1: Load Your Data
# ============================================
# To upload your CSV in Google Colab, uncomment the next 3 lines:
# from google.colab import files
# uploaded = files.upload()
# df = pd.read_csv(list(uploaded.keys())[0])

# For demonstration, here's sample data (remove when using your CSV):
np.random.seed(42)
sample_data = pd.DataFrame({
    'Age': np.random.randint(18, 85, 50),
    'Heart Rate': np.random.randint(60, 140, 50),
    'Blood Pressure': np.random.randint(90, 180, 50),
    'Triage Score (1-5)': np.random.randint(1, 6, 50),
    'Outcome': np.random.choice(['Admit', 'Discharge'], 50, p=[0.4, 0.6])
})

df = sample_data  # Replace this with your actual CSV

print("Data Preview:")
print(df.head(10))
print(f"\nTotal Patients: {len(df)}")
print(f"Outcomes:\n{df['Outcome'].value_counts()}")

# ============================================
# STEP 2: Prepare the Data
# ============================================
# Separate features (X) and outcome (y)
X = df[['Age', 'Heart Rate', 'Blood Pressure', 'Triage Score (1-5)']]
y = df['Outcome']

# Convert Admit/Discharge to numbers (0 and 1)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split: 40 patients for training, 10 for testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded,
    train_size=40,
    test_size=10,
    random_state=42,
    stratify=y_encoded
)

print(f"\nTraining on: {len(X_train)} patients")
print(f"Testing on: {len(X_test)} patients")

# ============================================
# STEP 3: Train the Random Forest Model
# ============================================
model = RandomForestClassifier(
    n_estimators=100,  # Number of decision trees
    random_state=42
)

print("\nTraining the model...")
model.fit(X_train, y_train)
print("✓ Training complete!")

# ============================================
# STEP 4: Test the Model and Show Results
# ============================================
# Make predictions on the 10 test patients
predictions = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
correct_predictions = int(accuracy * len(y_test))

# Display results in plain language
print("\n" + "="*60)
print("RESULTS")
print("="*60)
print(f"Accuracy: {accuracy*100:.0f}% (Correctly predicted {correct_predictions} out of {len(y_test)} admissions)")
print("="*60)

# Show detailed breakdown
print("\nDetailed Breakdown:")
test_results = pd.DataFrame({
    'Actual Outcome': label_encoder.inverse_transform(y_test),
    'Predicted Outcome': label_encoder.inverse_transform(predictions),
    'Correct?': ['✓' if y_test[i] == predictions[i] else '✗'
                 for i in range(len(y_test))]
})
print(test_results.to_string(index=False))

Data Preview:
   Age  Heart Rate  Blood Pressure  Triage Score (1-5)    Outcome
0   69          61             177                   1  Discharge
1   32          65             126                   1  Discharge
2   78         113             133                   1      Admit
3   38          63             175                   3  Discharge
4   41         113             124                   1      Admit
5   20         122             154                   4      Admit
6   39          77             136                   5      Admit
7   70         103             167                   1  Discharge
8   19          93              92                   3  Discharge
9   47         133              90                   3      Admit

Total Patients: 50
Outcomes:
Outcome
Discharge    28
Admit        22
Name: count, dtype: int64

Training on: 40 patients
Testing on: 10 patients

Training the model...
✓ Training complete!

RESULTS
Accuracy: 60% (Correctly predicted 6 out of 10 admissions)

D