In [1]:

import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler

# Chargement des données
cols = ['unit', 'cycle', 'op_setting_1', 'op_setting_2', 'op_setting_3'] + \
       [f'sensor_{i}' for i in range(1, 22)]

df = pd.read_csv('../data/raw/train_FD001.txt', sep='\s+', header=None)
df.columns = cols

# Calcul du RUL
rul_df = df.groupby('unit')['cycle'].max().reset_index()
rul_df.columns = ['unit', 'max_cycle']
df = df.merge(rul_df, on='unit')
df['RUL'] = df['max_cycle'] - df['cycle']

# Supprimer les capteurs constants (std = 0)
sensors = [col for col in df.columns if col.startswith('sensor_')]
constant_sensors = [col for col in sensors if df[col].std() == 0.0]
df.drop(columns=constant_sensors, inplace=True)
print(f"Capteurs supprimés car constants : {constant_sensors}")

# Normalisation des capteurs restants
remaining_sensors = [col for col in df.columns if col.startswith('sensor_')]
scaler = MinMaxScaler()
df[remaining_sensors] = scaler.fit_transform(df[remaining_sensors])

# Enregistrement dans data/cleaned
os.makedirs('../data/cleaned', exist_ok=True)
df.to_csv('../data/cleaned/train_FD001_cleaned.csv', index=False)
print("✅ Données nettoyées enregistrées dans data/cleaned/train_FD001_cleaned.csv")


Capteurs supprimés car constants : ['sensor_1', 'sensor_10', 'sensor_18', 'sensor_19']
✅ Données nettoyées enregistrées dans data/cleaned/train_FD001_cleaned.csv


In [3]:
pip install scikit-learn



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
