## ML model training 

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib

In [2]:
# Load your symptom data
df = pd.read_csv("../data/sample_symptoms.csv", parse_dates=["date"])


In [3]:
flare_mapping = {"low risk": 0, "medium risk": 1, "high risk": 2}

def map_flare(val):
    # If it's an int 0, 1, or 2, just return it
    if isinstance(val, int) and val in [0, 1, 2]:
        return val
    # If it's a string, check for mapping or numeric string
    if isinstance(val, str):
        val_clean = val.lower().strip()
        if val_clean in flare_mapping:
            return flare_mapping[val_clean]
        # Check if it's a string number
        if val_clean in ["0", "1", "2"]:
            return int(val_clean)
    # Otherwise, not valid
    return None

df['flare'] = df['flare'].apply(map_flare)
df = df.dropna(subset=['flare'])
df['flare'] = df['flare'].astype(int)

# THEN create X and y
X = df[["fatigue", "pain", "brain_fog"]]
y = df["flare"].astype(int)

In [4]:
print(df.shape)
print(df.head())
print(df['flare'].unique())

(149, 5)
         date  fatigue  pain  brain_fog  flare
0  2025-01-01        3     1          7      0
1  2025-01-02        8     5          1      0
2  2025-01-03        7     6          1      0
3  2025-01-04        4     1          1      0
4  2025-01-05       10     8          8      2
[0 2 1]


In [5]:
# Train a Random Forest
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, zero_division=0))

              precision    recall  f1-score   support

           0       0.88      0.82      0.85        17
           1       0.64      0.78      0.70         9
           2       1.00      0.75      0.86         4

    accuracy                           0.80        30
   macro avg       0.84      0.78      0.80        30
weighted avg       0.82      0.80      0.81        30



In [6]:
# Save the trained model
joblib.dump(model, "../backend/flare_model.pkl")
print("Model saved to backend/flare_model.pkl")

Model saved to backend/flare_model.pkl
