In [12]:
# Import required libraries
import pandas as pd
import numpy as np
import pickle
import json
import os
import warnings
import joblib
warnings.filterwarnings('ignore')

# Flask and API libraries
from flask import Flask, request, jsonify
import requests
# import threading
# import time

# Import custom functions
import sys
sys.path.append('../')
from functions.data_utils import load_transformers, transform_new_data

print("✅ All libraries imported successfully!")

✅ All libraries imported successfully!


In [13]:
# Configuration parameters
MODELS_PATH = '../models'
API_HOST = '127.0.0.1'
API_PORT = 5000
API_DEBUG = False

# API endpoints
BASE_URL = f'http://{API_HOST}:{API_PORT}'
PREDICT_URL = f'{BASE_URL}/predict'
PREDICT_BATCH_URL = f'{BASE_URL}/predict_batch'
HEALTH_URL = f'{BASE_URL}/health'
MODEL_INFO_URL = f'{BASE_URL}/model_info'

print(f"Models path: {MODELS_PATH}")
print(f"API Base URL: {BASE_URL}")
print(f"API Endpoints:")
print(f"  - Health: {HEALTH_URL}")
print(f"  - Model Info: {MODEL_INFO_URL}")
print(f"  - Single Predict: {PREDICT_URL}")
print(f"  - Batch Predict: {PREDICT_BATCH_URL}")

Models path: ../models
API Base URL: http://127.0.0.1:5000
API Endpoints:
  - Health: http://127.0.0.1:5000/health
  - Model Info: http://127.0.0.1:5000/model_info
  - Single Predict: http://127.0.0.1:5000/predict
  - Batch Predict: http://127.0.0.1:5000/predict_batch


In [14]:
final_model = joblib.load(os.path.join(MODELS_PATH, 'final_model.pkl'))
model_metadata = joblib.load(os.path.join(MODELS_PATH, 'model_metadata.pkl'))
transformers = load_transformers(os.path.join(MODELS_PATH, 'transformers.pkl'))
feature_names = model_metadata.get('features', [])

Transformers loaded from ../models\transformers.pkl


In [15]:
final_model

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [16]:
# Create dummy training data that matches the actual data structure
if feature_names and len(feature_names) > 0:
    # Get the transformers info to understand the expected input shape
    numeric_cols = transformers.get('numeric_cols', [])
    categorical_cols = transformers.get('categorical_cols', [])

In [17]:
def get_single_test_entry(test_set_original):
    """
    Estrae una singola entry (riga) dal test_set_original come dizionario.
    """
    if isinstance(test_set_original, pd.DataFrame):
        entry = test_set_original.sample(1).iloc[0].to_dict()
        return entry
    elif isinstance(test_set_original, list) and len(test_set_original) > 0:
        entry = test_set_original[np.random.randint(0, len(test_set_original))].copy() if isinstance(test_set_original[0], dict) else dict(test_set_original[np.random.randint(0, len(test_set_original))])
        return entry
    else:
        raise ValueError("test_set_original deve essere un DataFrame o una lista non vuota.")

def get_batch_test_entries(test_set_original, batch_size=5):
    """
    Estrae un batch di entry dal test_set_original come lista di dizionari.
    """
    if isinstance(test_set_original, pd.DataFrame):
        return test_set_original.sample(batch_size).to_dict(orient='records')
    elif isinstance(test_set_original, list) and len(test_set_original) >= batch_size:
        idx = np.random.choice(len(test_set_original), batch_size, replace=False)
        return [test_set_original[i] for i in idx]
    elif isinstance(test_set_original, list):
        return test_set_original
    else:
        raise ValueError("test_set_original deve essere un DataFrame o una lista non vuota.")



In [18]:
# Carica i dati di test e il target, assicurandosi che gli indici siano allineati
test_set_original = pd.read_csv('../data/splitted/X_test_raw.csv').reset_index(drop=True)
target = pd.read_csv('../data/splitted/y_test_raw.csv').reset_index(drop=True)

# Usa il nome della colonna originale del target
target_col_name = target.columns[0]

# Concatena il target come nuova colonna al test_set_original
test_set_original[target_col_name] = target[target_col_name]

In [19]:
test_set_original

Unnamed: 0,age,gender,sleep_quality_index,brain_fog_level,physical_pain_score,stress_level,depression_phq9_score,fatigue_severity_scale_score,pem_duration_hours,hours_of_sleep_per_night,pem_present,work_status,social_activity_level,exercise_frequency,meditation_or_mindfulness,diagnosis
0,21,Male,5.5,9.1,7.2,3.9,15.0,7.9,8.0,7.0,0,Partially working,Very low,Sometimes,Yes,Depression
1,47,Female,3.0,7.9,7.4,9.6,10.0,1.5,44.0,4.6,0,Partially working,Very low,Sometimes,No,Depression
2,61,Male,5.6,7.9,2.2,8.5,16.0,7.0,32.0,6.2,1,,Medium,Often,Yes,Both
3,39,Male,1.8,5.9,2.0,8.1,9.0,7.0,9.0,5.7,1,Working,Medium,Rarely,No,ME/CFS
4,38,Male,4.6,,1.5,3.8,26.0,7.8,14.0,4.8,0,Not working,Low,Often,Yes,Depression
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,53,Male,9.6,9.4,1.4,9.6,9.0,7.0,42.0,5.0,1,Partially working,Medium,Daily,Yes,ME/CFS
196,39,Male,6.3,2.2,8.0,5.1,11.0,3.6,20.0,8.7,0,Partially working,Low,Sometimes,Yes,Depression
197,61,Female,8.7,4.3,5.6,4.7,10.0,5.5,1.0,5.3,0,Working,Very high,Never,No,Depression
198,33,Female,,1.9,1.7,6.9,10.0,1.4,31.0,5.5,0,Not working,Very low,,Yes,Depression


In [20]:
# Esempio di utilizzo per testare le API
single_entry = get_single_test_entry(test_set_original)
batch_entries = get_batch_test_entries(test_set_original, batch_size=5)

# Inspect data for NaN values
print("🔍 Inspecting test data:")
print(f"Single entry keys: {list(single_entry.keys())}")
print(f"Single entry sample: {single_entry}")

🔍 Inspecting test data:
Single entry keys: ['age', 'gender', 'sleep_quality_index', 'brain_fog_level', 'physical_pain_score', 'stress_level', 'depression_phq9_score', 'fatigue_severity_scale_score', 'pem_duration_hours', 'hours_of_sleep_per_night', 'pem_present', 'work_status', 'social_activity_level', 'exercise_frequency', 'meditation_or_mindfulness', 'diagnosis']
Single entry sample: {'age': 20, 'gender': 'Female', 'sleep_quality_index': 7.5, 'brain_fog_level': 7.1, 'physical_pain_score': 3.0, 'stress_level': 4.7, 'depression_phq9_score': 22.0, 'fatigue_severity_scale_score': 8.9, 'pem_duration_hours': 33.0, 'hours_of_sleep_per_night': 4.5, 'pem_present': 1, 'work_status': 'Working', 'social_activity_level': nan, 'exercise_frequency': 'Often', 'meditation_or_mindfulness': 'No', 'diagnosis': 'Both'}


In [21]:
# Rimuove la colonna target (diagnosi) senza passare esplicitamente il nome
single_entry = {k: v for k, v in single_entry.items() if k != target_col_name}
print(json.dumps(single_entry, indent=2))

{
  "age": 20,
  "gender": "Female",
  "sleep_quality_index": 7.5,
  "brain_fog_level": 7.1,
  "physical_pain_score": 3.0,
  "stress_level": 4.7,
  "depression_phq9_score": 22.0,
  "fatigue_severity_scale_score": 8.9,
  "pem_duration_hours": 33.0,
  "hours_of_sleep_per_night": 4.5,
  "pem_present": 1,
  "work_status": "Working",
  "social_activity_level": NaN,
  "exercise_frequency": "Often",
  "meditation_or_mindfulness": "No"
}


In [22]:
batch_entries_no_target = [
    {k: v for k, v in entry.items() if k != target_col_name}
    for entry in batch_entries
]
print(json.dumps(batch_entries_no_target, indent=2))

[
  {
    "age": 37,
    "gender": "Female",
    "sleep_quality_index": 9.9,
    "brain_fog_level": 8.7,
    "physical_pain_score": 5.1,
    "stress_level": 7.6,
    "depression_phq9_score": 23.0,
    "fatigue_severity_scale_score": 2.4,
    "pem_duration_hours": 20.0,
    "hours_of_sleep_per_night": 7.1,
    "pem_present": 0,
    "work_status": "Working",
    "social_activity_level": "High",
    "exercise_frequency": "Rarely",
    "meditation_or_mindfulness": "No"
  },
  {
    "age": 35,
    "gender": "Male",
    "sleep_quality_index": 7.5,
    "brain_fog_level": 2.0,
    "physical_pain_score": 1.7,
    "stress_level": 8.1,
    "depression_phq9_score": 10.0,
    "fatigue_severity_scale_score": 2.5,
    "pem_duration_hours": 36.0,
    "hours_of_sleep_per_night": 3.7,
    "pem_present": 0,
    "work_status": "Working",
    "social_activity_level": "High",
    "exercise_frequency": "Sometimes",
    "meditation_or_mindfulness": "No"
  },
  {
    "age": 25,
    "gender": "Female",
    "sle

## 📬 **Guida per testare le API con Postman**

### **Prerequisiti**
1. **Avvia il server API**: `python api_server.py`
2. **Server in esecuzione su**: `http://127.0.0.1:5000`

---

### **🟢 Test 1: Health Check**
**Metodo**: `GET`  
**URL**: `http://127.0.0.1:5000/health`  
**Headers**: Nessun header necessario  
**Body**: Nessun body necessario  

**Risposta attesa**:
```json
{
    "status": "healthy",
    "message": "ML API is running",
    "timestamp": 1752372090.595487
}
```

---

### **🔍 Test 2: Model Info**
**Metodo**: `GET`  
**URL**: `http://127.0.0.1:5000/model_info`  
**Headers**: Nessun header necessario  
**Body**: Nessun body necessario  

**Risposta attesa**:
```json
{
    "model_name": "RandomForest",
    "model_type": "RandomForestClassifier",
    "problem_type": "multiclass_classification",
    "features": ["age", "gender", "sleep_quality_index", "..."],
    "n_features": 15,
    "target_classes": ["Both", "Depression", "ME/CFS"],
    "model_available": true,
    "transformers_available": true
}
```

---

### **🎯 Test 3: Single Prediction**
**Metodo**: `POST`  
**URL**: `http://127.0.0.1:5000/predict`  
**Headers**: 
- `Content-Type: application/json`

**Body (JSON)**:
```json
{
    "age": 45,
    "gender": "Female",
    "sleep_quality_index": 6.5,
    "brain_fog_level": 7.2,
    "physical_pain_score": 5.1,
    "stress_level": 6.8,
    "depression_phq9_score": 12.0,
    "fatigue_severity_scale_score": 6.5,
    "pem_duration_hours": 24.0,
    "hours_of_sleep_per_night": 7.5,
    "pem_present": 1,
    "work_status": "Working",
    "social_activity_level": "Medium",
    "exercise_frequency": "Sometimes",
    "meditation_or_mindfulness": "Yes"
}
```

**Risposta attesa**:
```json
{
    "prediction": "Depression",
    "raw_prediction": 1,
    "prediction_probabilities": [0.02, 0.96, 0.02],
    "class_labels": ["Both", "Depression", "ME/CFS"],
    "features_used": ["age", "gender", "..."],
    "n_features": 15
}
```

---

### **📦 Test 4: Batch Prediction**
**Metodo**: `POST`  
**URL**: `http://127.0.0.1:5000/predict_batch`  
**Headers**: 
- `Content-Type: application/json`

**Body (JSON)**:
```json
{
    "batch_features": [
        {
            "age": 30,
            "gender": "Male",
            "sleep_quality_index": 8.0,
            "brain_fog_level": 9.1,
            "physical_pain_score": 7.5,
            "stress_level": 8.2,
            "depression_phq9_score": 5.0,
            "fatigue_severity_scale_score": 8.5,
            "pem_duration_hours": 36.0,
            "hours_of_sleep_per_night": 6.0,
            "pem_present": 1,
            "work_status": "Not working",
            "social_activity_level": "Low",
            "exercise_frequency": "Never",
            "meditation_or_mindfulness": "No"
        },
        {
            "age": 55,
            "gender": "Female",
            "sleep_quality_index": 4.2,
            "brain_fog_level": 3.1,
            "physical_pain_score": 2.8,
            "stress_level": 5.5,
            "depression_phq9_score": 16.0,
            "fatigue_severity_scale_score": 4.2,
            "pem_duration_hours": 8.0,
            "hours_of_sleep_per_night": 8.5,
            "pem_present": 0,
            "work_status": "Working",
            "social_activity_level": "High",
            "exercise_frequency": "Daily",
            "meditation_or_mindfulness": "Yes"
        }
    ]
}
```

**Risposta attesa**:
```json
{
    "predictions": ["ME/CFS", "Depression"],
    "raw_predictions": [2, 1],
    "n_predictions": 2,
    "prediction_probabilities": [
        [0.05, 0.1, 0.85],
        [0.02, 0.95, 0.03]
    ],
    "class_labels": ["Both", "Depression", "ME/CFS"],
    "features_used": ["age", "gender", "..."],
    "n_features": 15
}
```

---

### **🛠️ Tips per Postman**

1. **Salva le richieste** in una Collection chiamata "ML API Tests"
2. **Crea Environment Variables**:
   - `base_url`: `http://127.0.0.1:5000`
   - Usa `{{base_url}}` nelle URL
3. **Aggiungi Tests (JavaScript)** per validazione automatica:
   ```javascript
   pm.test("Status code is 200", function () {
       pm.response.to.have.status(200);
   });
   
   pm.test("Response has prediction", function () {
       var jsonData = pm.response.json();
       pm.expect(jsonData).to.have.property('prediction');
   });
   ```
4. **Usa il Runner** per test automatici sequenziali
5. **Esporta la Collection** per condividerla con il team

---

### **⚠️ Note Importanti**

- **Valori mancanti**: Se ometti un campo, verrà sostituito con 0
- **Valori NaN**: Usa `null` in JSON invece di `NaN`
- **Categorie valide**:
  - `gender`: "Male", "Female"
  - `work_status`: "Working", "Not working", "Partially working"
  - `social_activity_level`: "Very low", "Low", "Medium", "High", "Very high"
  - `exercise_frequency`: "Never", "Rarely", "Sometimes", "Often", "Daily"
  - `meditation_or_mindfulness`: "Yes", "No"

**Tutti i test dovrebbero restituire predizioni accurate con probabilità!** 🎯