In [None]:
import math
from pathlib import Path

import numpy as np
import pandas as pd
import requests

API_URL = "http://localhost:8000/predict/"
MODEL_NAME = "lgbm_vanilla"
CSV_PATH = Path("../../data/application_test.csv")
BATCH_SIZE = 64


In [None]:
df_test = pd.read_csv(CSV_PATH)
print(f"Données test chargées : {df_test.shape[0]} lignes, {df_test.shape[1]} colonnes")
df_test.head()



✅ Données test chargées : 48744 lignes, 121 colonnes


Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100001,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,100005,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
2,100013,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,...,0,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0
3,100028,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
4,100038,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,...,0,0,0,0,,,,,,


In [134]:
def df_to_json_records(df: pd.DataFrame):
    df_clean = df.replace({np.nan: None})
    return df_clean.to_dict(orient="records")

In [135]:

def call_api_on_batch(batch_df: pd.DataFrame):
    payload = {
        "model_name": MODEL_NAME,
        "inputs": df_to_json_records(batch_df),
    }

    resp = requests.post(API_URL, json=payload, timeout=30)

    if resp.status_code != 200:
        raise RuntimeError(f"Erreur API {resp.status_code}: {resp.text[:500]}")

    return resp.json()


In [None]:
import math
import time
import numpy as np
import pandas as pd

all_preds = []
n = len(df_test)
n_batches = math.ceil(n / BATCH_SIZE)

for b in range(n_batches):
    start = b * BATCH_SIZE
    end = min((b + 1) * BATCH_SIZE, n)
    batch_df = df_test.iloc[start:end]

    try:
        resp_json = call_api_on_batch(batch_df)
    except Exception as e:
        print(f"Erreur sur le batch {b+1}: {e}")
        continue

    results = resp_json.get("results", [])

    for i, res in enumerate(results):
        row = {
            "index": start + i,
            "label": res.get("label"),
            "proba": res.get("proba"),
        }
        if "SK_ID_CURR" in batch_df.columns:
            row["SK_ID_CURR"] = batch_df.iloc[i]["SK_ID_CURR"]
        all_preds.append(row)
