In [7]:
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# ---------------------------
# 1. Charger les données
# ---------------------------
df = pd.read_csv("vols.csv", parse_dates=["[FK] Flight date"])

# Vérifier les colonnes et types
st.write("Colonnes du dataset :", df.columns)
st.write("Exemple de données :", df.head())

# Convertir Delta en float si besoin
df["Delta Total cost (en $)"] = (
    df["Delta Total cost (en $)"].replace('[\$,]', '', regex=True).astype(float)
)

# ---------------------------
# 2. Filtres Streamlit
# ---------------------------
city_pair = st.selectbox("City Pair", df["[LIDO] Citypair"].unique())

# Dates
date_range = st.date_input(
    "Sélectionner la période",
    [df["[FK] Flight date"].min(), df["[FK] Flight date"].max()]
)

type_avion = st.selectbox("Type Avions IATA", df["Type Avions IATA"].unique())

# ---------------------------
# 3. Filtrage des données
# ---------------------------
df_filtered = df[
    (df["[LIDO] Citypair"] == city_pair) &
    (df["[FK] Flight date"].dt.date.between(date_range[0], date_range[1])) &
    (df["Type Avions IATA"] == type_avion)
]

st.write("Nombre de vols filtrés :", len(df_filtered))
st.write(df_filtered.head())

# ---------------------------
# 4. Bootstrap et IC95%
# ---------------------------
if len(df_filtered) > 0:
    NBOOT = 5000
    boot_means = [
        df_filtered["Delta Total cost (en $)"].sample(frac=1, replace=True).mean()
        for _ in range(NBOOT)
    ]
    ci_low, ci_high = np.percentile(boot_means, [2.5, 97.5])
    mean_observed = df_filtered["Delta Total cost (en $)"].mean()

    # ---------------------------
    # 5. Résultats
    # ---------------------------
    st.write(f"**Nombre de vols :** {len(df_filtered)}")
    st.write(f"**Gain moyen observé :** {mean_observed:.2f}")
    st.write(f"**IC95% gain moyen :** ({ci_low:.2f}, {ci_high:.2f})")

    # ---------------------------
    # 6. Graphique
    # ---------------------------
    fig, ax = plt.subplots(figsize=(8,4))
    ax.hist(boot_means, bins=30, color="skyblue", edgecolor="black")
    ax.axvline(ci_low, color="red", linestyle="--", label=f"IC 2.5% = {ci_low:.2f}")
    ax.axvline(ci_high, color="red", linestyle="--", label=f"IC 97.5% = {ci_high:.2f}")
    ax.axvline(mean_observed, color="green", linestyle="-", label=f"Moyenne = {mean_observed:.2f}")
    ax.set_title(f"Distribution bootstrap des gains moyens pour {city_pair}")
    ax.set_xlabel("Gain moyen (Delta)")
    ax.set_ylabel("Fréquence")
    ax.legend()

    st.pyplot(fig)
else:
    st.warning("⚠️ Aucun vol trouvé pour ce filtre")



