In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use("seaborn-v0_8-darkgrid")

# Localisation du fichier CSV
current_dir = Path().resolve()
project_root = current_dir.parent
data_path = project_root / "Data" / "Dataset of weighing station temperature measurements.csv"

print("Fichier CSV :", data_path)

# IMPORTANT : le fichier est séparé par des ';'
df = pd.read_csv(data_path, sep=";")

# Aperçu
display(df.head())
print(df.columns)

In [None]:
# On convertit la colonne Time en datetime et on la met en index
df['Time'] = pd.to_datetime(df['Time'])
df = df.set_index('Time').sort_index()

df.head()


In [None]:
# Colonnes de température à l'intérieur du puits
low_cols = [c for c in df.columns if "Low" in c]
mid_cols = [c for c in df.columns if "Mid" in c]
top_cols = [c for c in df.columns if "Top" in c]

# On les trie par numéro de capteur S1, S2, ... S29
def sort_by_sensor(col_list):
    return sorted(col_list, key=lambda x: int(x.split("-S")[1]))

low_cols = sort_by_sensor(low_cols)
mid_cols = sort_by_sensor(mid_cols)
top_cols = sort_by_sensor(top_cols)

print("Nb capteurs LOW :", len(low_cols))
print("Nb capteurs MID :", len(mid_cols))
print("Nb capteurs TOP :", len(top_cols))

print("\nExemples LOW :", low_cols[:5])
print("Exemples MID :", mid_cols[:5])
print("Exemples TOP :", top_cols[:5])

In [None]:
#Zone de référence pour ON/OFF 
sensors_ref = [11, 12, 13]

T_low_ref = df[[low_cols[s-1] for s in sensors_ref]].mean(axis=1)
T_mid_ref = df[[mid_cols[s-1] for s in sensors_ref]].mean(axis=1)
T_top_ref = df[[top_cols[s-1] for s in sensors_ref]].mean(axis=1)

dT_local_ref  = T_top_ref - T_low_ref
dT_smooth_ref = dT_local_ref.rolling(5, center=True).mean()

on_threshold = 3.5
heater_on = dT_smooth_ref > on_threshold
print(T_low_ref.head())

In [None]:
# Histogramme de ΔT local (T_top - T_low) pour les capteurs de référence
plt.figure(figsize=(8, 5))
plt.hist(
    dT_local_ref.dropna(),    # on enlève les NaN
    bins=60,              # nombre de barres (tu peux ajuster)
    edgecolor="black",
    alpha=0.7
)

plt.xlabel("ΔT = T_top - T_low [°C]")
plt.ylabel("Fréquence")
plt.title(f"Histogramme de ΔT local (capteurs {sensors_ref})")
plt.grid(True, alpha=0.3)

# (optionnel) ligne verticale pour l’incertitude max des capteurs (~2 °C)
incertitude_max = 2.0
plt.axvline(incertitude_max, color="red", linestyle="--", label="Incertitude max")
plt.legend()

plt.show()

In [None]:
# Histogrammes de ΔT local dans différentes zones du puits

incertitude_max = 2.0
bins = 60

zones = {
    "Zone centre (S11-S13)": [11, 12, 13],
    "Zone gauche (S4-S6)"  : [1, 3, 4],
    "Zone droite (S20-S22)": [20, 21, 22],
}

for zone_name, zone_sensors in zones.items():
    T_low_zone = df[[low_cols[s-1] for s in zone_sensors]].mean(axis=1)
    T_top_zone = df[[top_cols[s-1] for s in zone_sensors]].mean(axis=1)
    dT_zone    = T_top_zone - T_low_zone

    plt.figure(figsize=(8,5))
    plt.hist(dT_zone.dropna(), bins=bins, edgecolor="black", alpha=0.7)
    plt.axvline(incertitude_max, color="red", linestyle="--", label="Incertitude max")
    plt.xlabel("ΔT = T_top - T_low [°C]")
    plt.ylabel("Fréquence")
    plt.title(f"Histogramme de ΔT local – {zone_name} (capteurs {zone_sensors})")
    plt.grid(True, alpha=0.3)
    plt.legend()
    plt.show()



In [None]:
# Visualisation de ΔT local lissé et détection ON/OFF
plt.figure(figsize=(14,5))
plt.plot(dT_smooth_ref, label="ΔT (TOP-LOW) lissé", color="tab:red")
plt.axhline(on_threshold, color="black", linestyle="--",
            label=f"Seuil ON = {on_threshold} °C")

plt.fill_between(dT_smooth_ref.index, on_threshold, dT_smooth_ref,
                 where=heater_on, color="tab:red", alpha=0.2,
                 label="Aérotherme ON")

plt.xlabel("Temps")
plt.ylabel("ΔT local [°C]")
plt.title(f"Poteau S{sensors_ref[0]}–S{sensors_ref[-1]} | ΔT local et détection ON/OFF")
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()


In [None]:
#Pourcentage du temps ON en fonction de T_ext et autres infos

T_ext = df["Outdoor temperature [deg. C]"]

tmp = pd.DataFrame({
    "T_ext_arrondie": T_ext.round().astype("Int64"),
    "ON": heater_on.astype(int)
})
pourcentage_on_ext = tmp.groupby("T_ext_arrondie")["ON"].mean().sort_index() * 100

print(pourcentage_on_ext)

T_ext_ON  = T_ext[heater_on]
T_ext_OFF = T_ext[~heater_on]

T_int_zone = (T_low_ref + T_mid_ref + T_top_ref) / 3

T_int_ON  = T_int_zone[heater_on]
T_int_OFF = T_int_zone[~heater_on]

print(f"T_ext moyenne quand heater ON  : {T_ext_ON.mean():.2f} °C")
print(f"T_ext moyenne quand heater OFF : {T_ext_OFF.mean():.2f} °C")
print(f"T_int locale moyenne quand ON  : {T_int_ON.mean():.2f} °C")
print(f"T_int locale moyenne quand OFF : {T_int_OFF.mean():.2f} °C")

plt.figure(figsize=(7,5))
plt.plot(pourcentage_on_ext.index, pourcentage_on_ext.values, marker="o")
plt.xlabel("Température extérieure arrondie [°C]")
plt.ylabel("Pourcentage du temps ON [%]")
plt.title(f"Pourcentage du temps ON en fonction de T_ext (sondes S{sensors_ref[0]}–S{sensors_ref[-1]})")
plt.grid(True, alpha=0.3)
plt.show()


In [None]:
# Modelisation ON/OFF pour 3 zones du puits
zones_dict = {
    "centre": [11, 12, 13],   
    "gauche": [1, 3, 4],      
    "droite": [20, 21, 22],   
}

T_ext_all = df["Outdoor temperature [deg. C]"]

list_zones_df = []

for zone_name, sondes in zones_dict.items():
    T_low_zone = df[[low_cols[s-1] for s in sondes]].mean(axis=1)
    T_mid_zone = df[[mid_cols[s-1] for s in sondes]].mean(axis=1)
    T_top_zone = df[[top_cols[s-1] for s in sondes]].mean(axis=1)

    dT_local_zone = T_top_zone - T_low_zone

    dT_smooth_zone = dT_local_zone.rolling(5, center=True).mean()

    heater_on_zone = (dT_smooth_zone > on_threshold)

    T_int_zone = (T_low_zone + T_mid_zone + T_top_zone) / 3

    # Petit tableau pour cette zone
    df_zone = pd.DataFrame({
        "zone": zone_name,
        "T_ext": T_ext_all,                               # même T_ext pour toutes les zones
        "T_ext_arrondie": T_ext_all.round().astype("Int64"),
        "ON": heater_on_zone.astype(int),
        "T_int": T_int_zone,
    })

    list_zones_df.append(df_zone)

df_all_zones = pd.concat(list_zones_df, ignore_index=True)


In [None]:
# Pourcentage du temps ON en fonction de T_ext (global, toutes zones confondues)
pourcentage_on_ext_global = (
    df_all_zones
        .groupby("T_ext_arrondie")["ON"]
        .mean()
        .sort_index() * 100
)

print(pourcentage_on_ext_global)


In [None]:
# Températures moyennes globales quand heater ON/OFF
T_ext_global_ON  = df_all_zones.loc[df_all_zones["ON"] == 1, "T_ext"].mean()
T_ext_global_OFF = df_all_zones.loc[df_all_zones["ON"] == 0, "T_ext"].mean()

T_int_global_ON  = df_all_zones.loc[df_all_zones["ON"] == 1, "T_int"].mean()
T_int_global_OFF = df_all_zones.loc[df_all_zones["ON"] == 0, "T_int"].mean()

print(f"T_ext moyenne globale quand heater ON  : {T_ext_global_ON:.2f} °C")
print(f"T_ext moyenne globale quand heater OFF : {T_ext_global_OFF:.2f} °C")
print(f"T_int locale moyenne globale ON        : {T_int_global_ON:.2f} °C")
print(f"T_int locale moyenne globale OFF       : {T_int_global_OFF:.2f} °C")


In [None]:
plt.figure(figsize=(7,5))
plt.plot(
    pourcentage_on_ext_global.index,
    pourcentage_on_ext_global.values,
    marker="o"
)
plt.xlabel("Température extérieure arrondie [°C]")
plt.ylabel("Pourcentage du temps ON [%]")
plt.title("Pourcentage du temps ON en fonction de T_ext (moyenne des 3 zones)")
plt.grid(True, alpha=0.3)
plt.show()
