# DATA GENERATOR

CODE 1️⃣ : FAULT-TYPE **CLASSIFICATION**

In [30]:
import numpy as np
import pandas as pd

# -------- CONFIG --------
FS = 10
DURATION_H = 3
TOTAL_SAMPLES = FS * 3600 * DURATION_H

timestamps = pd.date_range(start="2025-01-01", periods=TOTAL_SAMPLES, freq="s")
df = pd.DataFrame({"timestamp": timestamps})

# -------- CAPTEURS ET PLAGES (datasheets) --------
SENSORS = {
    "Gx": (-250, 250), "Gy": (-250, 250), "Gz": (-250, 250),
    "Ax": (-2, 2), "Ay": (-2, 2), "Az": (-2, 2),
    "MagX": (-800, 800), "MagY": (-800, 800), "MagZ": (-800, 800),
    "Temp_cpu": (-20, 50), "Temp_batt": (-20, 50), "Temp_panels": (-20, 50),
    "V_batt": (3.0, 4.2), "I_batt": (0, 5),
    "V_solar": (0, 20), "I_solar": (0, 3),
    "ADC": (0, 3.3),
    "Temp_BMP": (-20, 50), "Press_BMP": (300, 1100),
    "Light": (0, 100000)
}

# ---- remplir capteurs avec valeurs normales aléatoires ----
for s, (mn, mx) in SENSORS.items():
    df[s] = np.random.uniform(mn, mx, TOTAL_SAMPLES)

# -------- FAUTES MULTI-CAPTEURS AVEC BRUIT REALISTE --------
FAULTS = [
    "GyroFault", "AccelFault", "MagFault", "TempFault",
    "BattFault", "SolarFault", "ADC_Fault", "BMP_Fault",
    "LightFault", "Normal"
]

segment = TOTAL_SAMPLES // len(FAULTS)

for i, cls in enumerate(FAULTS):
    start = i * segment
    end = start + segment

    if cls == "GyroFault":
        # Multiplier aléatoire proche de 2
        df.loc[start:end-1, ["Gx","Gy","Gz"]] *= np.random.uniform(1.8,2.2,(segment,3))

    elif cls == "AccelFault":
        # Ajouter bruit normal réaliste
        df.loc[start:end-1, ["Ax","Ay","Az"]] += np.random.normal(0,0.5,(segment,3))

    elif cls == "MagFault":
        df.loc[start:end-1, ["MagX","MagY","MagZ"]] *= np.random.uniform(2.0,3.0,(segment,3))

    elif cls == "TempFault":
        df.loc[start:end-1, ["Temp_cpu","Temp_batt","Temp_panels","Temp_BMP"]] += np.random.normal(15,2,(segment,4))

    elif cls == "BattFault":
        df.loc[start:end-1, "V_batt"] *= np.random.uniform(0.65,0.75,segment)
        df.loc[start:end-1, "I_batt"] *= np.random.uniform(1.4,1.6,segment)

    elif cls == "SolarFault":
        df.loc[start:end-1, "V_solar"] *= np.random.uniform(0.4,0.6,segment)
        df.loc[start:end-1, "I_solar"] *= np.random.uniform(0.4,0.6,segment)

    elif cls == "ADC_Fault":
        df.loc[start:end-1, "ADC"] *= np.random.uniform(1.7,2.0,segment)

    elif cls == "BMP_Fault":
        df.loc[start:end-1, "Press_BMP"] *= np.random.uniform(1.05,1.15,segment)

    elif cls == "LightFault":
        df.loc[start:end-1, "Light"] *= np.random.uniform(1.4,1.6,segment)

    # ---- labels fault_type ----
    df.loc[start:end-1, "fault_type_current"] = cls


# ---- sauvegarde ----
df.to_csv("fault_type_FDIR_dataset_realistic.csv", index=False)
print("✅ fault_type_FDIR_dataset_realistic.csv généré")
print(df.head())


✅ fault_type_FDIR_dataset_realistic.csv généré
            timestamp          Gx          Gy          Gz        Ax        Ay  \
0 2025-01-01 00:00:00 -386.218554 -288.658589  326.841988 -1.855087  1.536093   
1 2025-01-01 00:00:01  131.220691   51.877525 -377.487725 -1.453402 -0.060823   
2 2025-01-01 00:00:02   92.416385    7.235131   84.049521 -1.629577  1.736599   
3 2025-01-01 00:00:03  101.829551  427.865523 -270.453548  1.701457 -0.482733   
4 2025-01-01 00:00:04  294.706740  259.263186  507.820698  0.858339  1.834343   

         Az        MagX        MagY        MagZ  ...  Temp_panels    V_batt  \
0  0.578140  483.780686  457.194138  103.487425  ...    39.579536  3.355853   
1 -0.147111   26.535953  175.551813  371.614487  ...    12.266648  3.350209   
2 -0.956939 -392.548581  547.422554 -745.061619  ...     7.607936  3.421646   
3 -1.556853  561.690604  556.040584  646.995125  ...    -1.543699  3.580026   
4 -0.678038 -232.865305   87.685255  139.473317  ...     3.733388  3.84

CODE 2️⃣ : ANOMALY DETECTION **ONLY**

In [14]:
import numpy as np
import pandas as pd

# -------- CONFIG --------
FS = 10
DURATION_H = 3
TOTAL_SAMPLES = FS * 3600 * DURATION_H

timestamps = pd.date_range(start="2025-01-01", periods=TOTAL_SAMPLES, freq="s")
df = pd.DataFrame({"timestamp": timestamps})

# -------- CAPTEURS ET PLAGES (datasheets) --------
SENSORS = {
    "Gx": (-250, 250), "Gy": (-250, 250), "Gz": (-250, 250),
    "Ax": (-2, 2), "Ay": (-2, 2), "Az": (-2, 2),
    "MagX": (-800, 800), "MagY": (-800, 800), "MagZ": (-800, 800),
    "Temp_cpu": (-20, 50), "Temp_batt": (-20, 50), "Temp_panels": (-20, 50),
    "V_batt": (3.0, 4.2), "I_batt": (0, 5),
    "V_solar": (0, 20), "I_solar": (0, 3),
    "ADC": (0, 3.3),
    "Temp_BMP": (-20, 50), "Press_BMP": (300, 1100),
    "Light": (0, 100000)
}

# ---- remplir capteurs avec valeurs normales aléatoires ----
for s, (mn, mx) in SENSORS.items():
    df[s] = np.random.uniform(mn, mx, TOTAL_SAMPLES)

# -------- FAUTES MULTI-CAPTEURS AVEC BRUIT REALISTE --------
FAULTS = [
    "GyroFault", "AccelFault", "MagFault", "TempFault",
    "BattFault", "SolarFault", "ADC_Fault", "BMP_Fault",
    "LightFault"
]

# Calculer segments pour équilibrer classes
num_faults = len(FAULTS)
segment = TOTAL_SAMPLES // (num_faults * 2)  # *2 pour normal + anomalie

# On crée moitié normal, moitié anomalie
for i, cls in enumerate(FAULTS):
    # Normal segment
    start_norm = i * segment * 2
    end_norm = start_norm + segment
    df.loc[start_norm:end_norm-1, "anomaly"] = 0  # normal

    # Anomalie segment
    start_fault = end_norm
    end_fault = start_fault + segment

    if cls == "GyroFault":
        df.loc[start_fault:end_fault-1, ["Gx","Gy","Gz"]] *= np.random.uniform(1.8,2.2,(segment,3))
    elif cls == "AccelFault":
        df.loc[start_fault:end_fault-1, ["Ax","Ay","Az"]] += np.random.normal(0,0.5,(segment,3))
    elif cls == "MagFault":
        df.loc[start_fault:end_fault-1, ["MagX","MagY","MagZ"]] *= np.random.uniform(2.0,3.0,(segment,3))
    elif cls == "TempFault":
        df.loc[start_fault:end_fault-1, ["Temp_cpu","Temp_batt","Temp_panels","Temp_BMP"]] += np.random.normal(15,2,(segment,4))
    elif cls == "BattFault":
        df.loc[start_fault:end_fault-1, "V_batt"] *= np.random.uniform(0.65,0.75,segment)
        df.loc[start_fault:end_fault-1, "I_batt"] *= np.random.uniform(1.4,1.6,segment)
    elif cls == "SolarFault":
        df.loc[start_fault:end_fault-1, "V_solar"] *= np.random.uniform(0.4,0.6,segment)
        df.loc[start_fault:end_fault-1, "I_solar"] *= np.random.uniform(0.4,0.6,segment)
    elif cls == "ADC_Fault":
        df.loc[start_fault:end_fault-1, "ADC"] *= np.random.uniform(1.7,2.0,segment)
    elif cls == "BMP_Fault":
        df.loc[start_fault:end_fault-1, "Press_BMP"] *= np.random.uniform(1.05,1.15,segment)
    elif cls == "LightFault":
        df.loc[start_fault:end_fault-1, "Light"] *= np.random.uniform(1.4,1.6,segment)

    df.loc[start_fault:end_fault-1, "anomaly"] = 1  # anomalie

# -------- 4. Sauvegarde --------
df.to_csv("binary_anomaly_FDIR_dataset_balanced.csv", index=False)
print("✅ Dataset binaire équilibré généré : binary_anomaly_FDIR_dataset_balanced.csv")
print(df.head())
print("Distribution des classes :", df["anomaly"].value_counts())


✅ Dataset binaire équilibré généré : binary_anomaly_FDIR_dataset_balanced.csv
            timestamp          Gx          Gy          Gz        Ax        Ay  \
0 2025-01-01 00:00:00  100.576419 -233.429584  -49.667932 -0.003564 -1.047701   
1 2025-01-01 00:00:01  -93.043331 -183.256837 -127.515346  1.442494 -0.638449   
2 2025-01-01 00:00:02  224.178058  -23.067546  -64.752436 -1.733574 -0.903230   
3 2025-01-01 00:00:03 -184.096199   58.470120  125.888545 -0.899810  0.097458   
4 2025-01-01 00:00:04  -36.027690   46.270628  -23.462693  0.434065  1.269565   

         Az        MagX        MagY        MagZ  ...  Temp_panels    V_batt  \
0  0.819604  576.117805   68.919416 -730.729541  ...    -3.294641  3.984588   
1  1.125632 -756.760747  471.886944  197.823270  ...    30.367089  3.220566   
2 -1.398543  480.120340  362.756705 -760.603014  ...     8.720640  3.408291   
3  1.577225  -87.503583 -616.337459  558.362075  ...    26.574865  3.136957   
4 -0.759633  176.677976 -628.376263 -307

# **model classfication current **


In [31]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import lightgbm as lgb

# -----------------------
# 1️⃣ Charger le dataset
# -----------------------
df = pd.read_csv("fault_type_FDIR_dataset_realistic.csv")

# Features capteurs
SENSOR_COLS = [c for c in df.columns if c not in ["timestamp","fault_type_current"]]

# Label
label_current = "fault_type_current"

# -----------------------
# 2️⃣ Encodage du label
# -----------------------
le_current = LabelEncoder()
df[label_current] = le_current.fit_transform(df[label_current])

# -----------------------
# 3️⃣ Créer des fenêtres glissantes
# -----------------------
WINDOW_SIZE = 10  # 10 samples ~ 1s à FS=10Hz
X_current = []
y_current = []

for i in range(len(df) - WINDOW_SIZE):
    # Chaque fenêtre de 10 échantillons aplatie en vecteur
    X_current.append(df[SENSOR_COLS].iloc[i:i+WINDOW_SIZE].values.flatten())
    # Label associé à la dernière mesure de la fenêtre
    y_current.append(df[label_current].iloc[i+WINDOW_SIZE-1])

X_current = np.array(X_current)
y_current = np.array(y_current)

print(f"X_current shape: {X_current.shape}, y_current shape: {y_current.shape}")

# -----------------------
# 4️⃣ Split train/test
# -----------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_current, y_current, test_size=0.2, random_state=42, shuffle=True
)

# -----------------------
# 5️⃣ LightGBM pour fault_type_current
# -----------------------
lgb_curr = lgb.LGBMClassifier(
    objective="multiclass",
    num_class=len(le_current.classes_),
    n_estimators=200,
    learning_rate=0.1,
    max_depth=7,
    random_state=42
)

lgb_curr.fit(X_train, y_train)
y_pred_curr = lgb_curr.predict(X_test)

# -----------------------
# 6️⃣ Résultats
# -----------------------
print("✅ Classification fault_type_current Accuracy:", accuracy_score(y_test, y_pred_curr))
print(classification_report(y_test, y_pred_curr, target_names=le_current.classes_))


X_current shape: (107990, 200), y_current shape: (107990,)
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.182249 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51000
[LightGBM] [Info] Number of data points in the train set: 86392, number of used features: 200
[LightGBM] [Info] Start training from score -2.306203
[LightGBM] [Info] Start training from score -2.306319
[LightGBM] [Info] Start training from score -2.298565
[LightGBM] [Info] Start training from score -2.300411
[LightGBM] [Info] Start training from score -2.304578
[LightGBM] [Info] Start training from score -2.290413
[LightGBM] [Info] Start training from score -2.303535
[LightGBM] [Info] Start training from score -2.300758
[LightGBM] [Info] Start training from score -2.306552
[LightGBM] [Info] Start training from score -2.308646




✅ Classification fault_type_current Accuracy: 0.9575423650337994
              precision    recall  f1-score   support

   ADC_Fault       1.00      0.99      1.00      2192
  AccelFault       0.99      0.93      0.96      2193
   BMP_Fault       0.99      0.69      0.81      2126
   BattFault       1.00      1.00      1.00      2142
   GyroFault       1.00      1.00      1.00      2169
  LightFault       1.00      0.98      0.99      2055
    MagFault       1.00      1.00      1.00      2169
      Normal       0.71      0.98      0.82      2144
  SolarFault       1.00      1.00      1.00      2195
   TempFault       1.00      1.00      1.00      2213

    accuracy                           0.96     21598
   macro avg       0.97      0.96      0.96     21598
weighted avg       0.97      0.96      0.96     21598



In [33]:
import pickle

with open("lgb_fault_type_current.pkl", "wb") as f:
    pickle.dump({
        "model": lgb_curr,
        "label_encoder": le_current,
        "features": list(SENSORS.keys())
    }, f)

print("✅ Modèle fault_type_current sauvegardé (lgb_fault_type_current.pkl)")

✅ Modèle fault_type_current sauvegardé (lgb_fault_type_current.pkl)


# **detection current **


In [15]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import lightgbm as lgb

# -----------------------
# 1️⃣ Charger le dataset binaire
# -----------------------
df = pd.read_csv("/content/binary_anomaly_FDIR_dataset_balanced.csv")  # dataset avec 'anomaly' 0/1

# Features capteurs
SENSOR_COLS = [c for c in df.columns if c not in ["timestamp","anomaly"]]

# Label binaire
label = "anomaly"

# -----------------------
# 2️⃣ Encodage du label (optionnel car déjà 0/1)
# -----------------------
# Si jamais ton label était string
# le = LabelEncoder()
# df[label] = le.fit_transform(df[label])

# -----------------------
# 3️⃣ Créer des fenêtres glissantes
# -----------------------
WINDOW_SIZE = 10  # 10 samples (~1s si FS=10Hz)
X = []
y = []

for i in range(len(df) - WINDOW_SIZE):
    # On aplatit chaque fenêtre en un vecteur
    X.append(df[SENSOR_COLS].iloc[i:i+WINDOW_SIZE].values.flatten())
    # On prend le label de la dernière mesure de la fenêtre
    y.append(df[label].iloc[i+WINDOW_SIZE-1])

X = np.array(X)
y = np.array(y)

print(f"X shape: {X.shape}, y shape: {y.shape}")

# -----------------------
# 4️⃣ Split train/test
# -----------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True
)

# -----------------------
# 5️⃣ LightGBM pour détection d'anomalie
# -----------------------
lgb_model = lgb.LGBMClassifier(
    objective="binary",
    n_estimators=200,
    learning_rate=0.1,
    max_depth=7,
    random_state=42
)

lgb_model.fit(X_train, y_train)
y_pred = lgb_model.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=["Normal", "Anomaly"]))


X shape: (107990, 200), y shape: (107990,)
[LightGBM] [Info] Number of positive: 43225, number of negative: 43167
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.167795 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51000
[LightGBM] [Info] Number of data points in the train set: 86392, number of used features: 200
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500336 -> initscore=0.001343
[LightGBM] [Info] Start training from score 0.001343




✅ Accuracy: 0.9535605148624873
              precision    recall  f1-score   support

      Normal       0.92      0.99      0.96     10824
     Anomaly       0.99      0.91      0.95     10774

    accuracy                           0.95     21598
   macro avg       0.96      0.95      0.95     21598
weighted avg       0.96      0.95      0.95     21598



In [35]:
# 6️⃣ Sauvegarder modèle + encodeur + colonnes
# -----------------------
with open("lgb_anomaly_model.pkl", "wb") as f:
    pickle.dump({
        "model": lgb_model,
        "features": list(SENSORS.keys())
    }, f)

print("✅ Modèle d’anomalie sauvegardé : lgb_anomaly_model.pkl")

✅ Modèle d’anomalie sauvegardé : lgb_anomaly_model.pkl


# **Testing**

In [41]:
import numpy as np
import pandas as pd
import time

# --- Définition des capteurs et plages ---
SENSORS = {
    "Gx": (-250, 250), "Gy": (-250, 250), "Gz": (-250, 250),
    "Ax": (-2, 2), "Ay": (-2, 2), "Az": (-2, 2),
    "MagX": (-800, 800), "MagY": (-800, 800), "MagZ": (-800, 800),
    "Temp_cpu": (-20, 50), "Temp_batt": (-20, 50), "Temp_panels": (-20, 50),
    "V_batt": (3.0, 4.2), "I_batt": (0, 5),
    "V_solar": (0, 20), "I_solar": (0, 3),
    "ADC": (0, 3.3),
    "Temp_BMP": (-20, 50), "Press_BMP": (300, 1100),
    "Light": (0, 100000)
}

FAULTS = [
    "GyroFault", "AccelFault", "MagFault", "TempFault",
    "BattFault", "SolarFault", "ADC_Fault", "BMP_Fault",
    "LightFault"
]

# --- Simuler les modèles (ici avec des "dummy" pour exemple) ---
class DummyDetectionModel:
    def predict(self, X):
        # Retourne 1 (anomalie) si une valeur dépasse 90% du max, sinon 0
        X_arr = X.values
        threshold = 0.9
        return np.any(X_arr >= threshold * np.array([v[1] for v in SENSORS.values()]), axis=1).astype(int)

class DummyClassificationModel:
    def predict(self, X):
        # Retourne un type de panne aléatoire parmi FAULTS
        return np.random.choice(FAULTS, size=len(X))

# Charger ou instancier les modèles
det_model = DummyDetectionModel()
class_model = DummyClassificationModel()

# --- Simulation flux capteurs en temps réel ---
def generate_sensor_values():
    return {k: np.random.uniform(v[0], v[1]) for k, v in SENSORS.items()}

# Nombre de lectures simulées
NUM_READINGS = 20

for i in range(NUM_READINGS):
    # Simuler une lecture capteur
    reading = generate_sensor_values()
    df_reading = pd.DataFrame([reading])

    # Détection d'anomalie
    anomaly_pred = det_model.predict(df_reading)[0]

    if anomaly_pred == 1:
        fault_pred = class_model.predict(df_reading)[0]
        print(f"[{i}] ANOMALIE DETECTEE ! Type: {fault_pred} | Valeurs: {reading}")
    else:
        print(f"[{i}] Normal | Valeurs: {reading}")

    time.sleep(0.5)  # Simuler un intervalle réel (0.5 sec)


[0] Normal | Valeurs: {'Gx': -149.9728680123456, 'Gy': -137.22333956265913, 'Gz': -104.18543335374835, 'Ax': 1.4405905649353068, 'Ay': -1.9598502613136812, 'Az': -1.397262372686082, 'MagX': 507.11589741208877, 'MagY': -72.71362298397878, 'MagZ': -156.11149198962164, 'Temp_cpu': 35.675692154760654, 'Temp_batt': -4.746382306956196, 'Temp_panels': 18.049830752759455, 'V_batt': 3.463381869680483, 'I_batt': 0.24715060414298995, 'V_solar': 11.637339444876964, 'I_solar': 2.4475971224594457, 'ADC': 1.80052622453247, 'Temp_BMP': 31.746094401940773, 'Press_BMP': 773.6346920371943, 'Light': 8416.860448001218}
[1] ANOMALIE DETECTEE ! Type: AccelFault | Valeurs: {'Gx': 31.101197324746465, 'Gy': 168.72524981879621, 'Gz': -90.09063736635869, 'Ax': 1.9868649499428002, 'Ay': -1.4891330497306963, 'Az': -1.0126934287167546, 'MagX': 77.07969034053497, 'MagY': 32.0278340962833, 'MagZ': -580.8245456826153, 'Temp_cpu': 25.635756913456106, 'Temp_batt': 34.573000583508296, 'Temp_panels': 25.740591131681647, 'V