## ML model training 

In [6]:
# Import necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib

In [7]:
# Load your symptom data
df = pd.read_csv("../data/sample_symptoms.csv", parse_dates=["date"])


In [8]:
# Map and clean first
flare_mapping = {"low risk": 0, "medium risk": 1, "high risk": 2}

def map_flare(val):
    if isinstance(val, str):
        val = val.lower().strip()
        return flare_mapping.get(val, None)
    return val  # leave numeric values as-is

df['flare'] = df['flare'].apply(map_flare)
df = df.dropna(subset=['flare'])
df['flare'] = df['flare'].astype(int)

# THEN create X and y
X = df[["fatigue", "pain", "brain_fog"]]
y = df["flare"].astype(int)

In [9]:
print(df.shape)
print(df.head())
print(df['flare'].unique())

(10, 5)
        date  fatigue  pain  brain_fog  flare
0 2024-01-01        3     2          1      0
1 2024-01-02        7     6          5      1
2 2024-01-03        2     3          2      0
3 2024-01-04        8     7          6      1
4 2025-05-07       10     1          6      1
[0 1]


In [12]:
# Train a Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)

# Evaluate
y_pred = model.predict(X)
print(classification_report(y, y_pred))

# Save the trained model
joblib.dump(model, "../backend/flare_model.pkl")
print("Model saved to backend/flare_model.pkl")

              precision    recall  f1-score   support

           0       0.71      1.00      0.83         5
           1       1.00      0.60      0.75         5

    accuracy                           0.80        10
   macro avg       0.86      0.80      0.79        10
weighted avg       0.86      0.80      0.79        10

Model saved to backend/flare_model.pkl
