In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import matplotlib.pyplot as plt

sns.set_theme()
sns.set_style("white")

In [2]:
def compute_col_stats(df: pd.DataFrame, col_to_calc_stats_on: str):
    
    ruls = []
    unit_min_life_time = None    
    unit_max_life_time = None
    
    for unit in df["unit"].unique().tolist():
        unit_max_col_value = df.query(f"unit=={unit}")[col_to_calc_stats_on].max()
        ruls.append(unit_max_col_value)
        
        # Find the unit with the SMALLEST 'unit_max_col_value'
        if unit_max_col_value <= min(ruls) and len(ruls) > 0:
                unit_min_life = unit
        
        # Find the unit with the LARGEST 'unit_max_col_value'
        if unit_max_col_value >= max(ruls) and len(ruls) > 0:
                unit_max_life_time = unit
    
    ruls_min = np.min(ruls)
    ruls_max = np.max(ruls)
    ruls_mean = np.mean(ruls)
    ruls_median = np.median(ruls)
    ruls_std = np.std(ruls)

    print(f"'{col_to_calc_stats_on}' Minimum: {ruls_min: .2f} (unit={unit_min_life})")
    print(f"'{col_to_calc_stats_on}' Maximum: {ruls_max: .2f} (unit={unit_max_life_time})")
    print(f"'{col_to_calc_stats_on}' Mean: {ruls_mean: .2f}")
    print(f"'{col_to_calc_stats_on}' Median: {ruls_median: .2f}")
    print(f"'{col_to_calc_stats_on}' Standard Dev: {ruls_std: .2f}\n")
    
    return ruls

In [3]:
def plot_operatoinal_life_time_dist(df: pd.DataFrame, col: str):
    cycles_list = []
    for unit in df["unit"].unique().tolist():
        unit_max_cycles = df.query(f"unit=={unit}")[col].iloc[-1]
        cycles_list.append(unit_max_cycles)

    sns.displot(x=cycles_list, kde=True)

In [4]:
AE_labels = "AutoEncoder"

FD001_AE_LABELS = f"FD001_train_unsupervised_labels_{AE_labels}.csv"
FD002_AE_LABELS = f"FD002_train_unsupervised_labels_{AE_labels}.csv"
FD003_AE_LABELS = f"FD003_train_unsupervised_labels_{AE_labels}.csv"
FD004_AE_LABELS = f"FD004_train_unsupervised_labels_{AE_labels}.csv"

path_to_data_dir = "/Users/rafaeltoche/Documents/School/Research/" \
                       "Rainwaters_Lab/DART-LP2/Condition_Monitoring/NASA_turbofan_data/train"

In [5]:
df_FD001 = pd.read_csv(os.path.join(path_to_data_dir, FD001_AE_LABELS))
df_FD002 = pd.read_csv(os.path.join(path_to_data_dir, FD002_AE_LABELS))
df_FD003 = pd.read_csv(os.path.join(path_to_data_dir, FD003_AE_LABELS))
df_FD004 = pd.read_csv(os.path.join(path_to_data_dir, FD004_AE_LABELS))

In [6]:
print("FD001 Stats")
print("------------------------------------------")
cycles_FD001 = compute_col_stats(df_FD001, col_to_calc_stats_on="cycle")

print("FD002 Stats")
print("------------------------------------------")
cycles_FD002 = compute_col_stats(df_FD002, col_to_calc_stats_on="cycle")

print("FD003 Stats")
print("------------------------------------------")
cycles_FD003 = compute_col_stats(df_FD003, col_to_calc_stats_on="cycle")

print("FD004 Stats")
print("------------------------------------------")
cycles_FD004 = compute_col_stats(df_FD004, col_to_calc_stats_on="cycle")

FD001 Stats
------------------------------------------
'cycle' Minimum:  128.00 (unit=39)
'cycle' Maximum:  362.00 (unit=69)
'cycle' Mean:  206.31
'cycle' Median:  199.00
'cycle' Standard Dev:  46.11

FD002 Stats
------------------------------------------
'cycle' Minimum:  128.00 (unit=244)
'cycle' Maximum:  378.00 (unit=112)
'cycle' Mean:  206.77
'cycle' Median:  199.00
'cycle' Standard Dev:  46.69

FD003 Stats
------------------------------------------
'cycle' Minimum:  145.00 (unit=99)
'cycle' Maximum:  525.00 (unit=55)
'cycle' Mean:  247.20
'cycle' Median:  220.50
'cycle' Standard Dev:  86.05

FD004 Stats
------------------------------------------
'cycle' Minimum:  128.00 (unit=214)
'cycle' Maximum:  543.00 (unit=118)
'cycle' Mean:  245.98
'cycle' Median:  234.00
'cycle' Standard Dev:  72.96


In [8]:
print("FD001 RUL Stats")
print("------------------------------------------")
rul_FD001 = compute_col_stats(df_FD001, col_to_calc_stats_on="RUL")

print("FD002 RUL Stats")
print("------------------------------------------")
rul_FD002 = compute_col_stats(df_FD002, col_to_calc_stats_on="RUL")

print("FD003 RUL Stats")
print("------------------------------------------")
rul_FD003 = compute_col_stats(df_FD003, col_to_calc_stats_on="RUL")

print("FD004 RUL  Stats")
print("------------------------------------------")
rul_FD004 = compute_col_stats(df_FD004, col_to_calc_stats_on="RUL")

FD001 RUL Stats
------------------------------------------
'RUL' Minimum:  127.00 (unit=39)
'RUL' Maximum:  361.00 (unit=69)
'RUL' Mean:  205.31
'RUL' Median:  198.00
'RUL' Standard Dev:  46.11

FD002 RUL Stats
------------------------------------------
'RUL' Minimum:  127.00 (unit=244)
'RUL' Maximum:  377.00 (unit=112)
'RUL' Mean:  205.77
'RUL' Median:  198.00
'RUL' Standard Dev:  46.69

FD003 RUL Stats
------------------------------------------
'RUL' Minimum:  144.00 (unit=99)
'RUL' Maximum:  524.00 (unit=55)
'RUL' Mean:  246.20
'RUL' Median:  219.50
'RUL' Standard Dev:  86.05

FD004 RUL  Stats
------------------------------------------
'RUL' Minimum:  127.00 (unit=214)
'RUL' Maximum:  542.00 (unit=118)
'RUL' Mean:  244.98
'RUL' Median:  233.00
'RUL' Standard Dev:  72.96


In [None]:
plot_operatoinal_life_time_dist(df_FD001, col="cycle")

In [None]:
plot_operatoinal_life_time_dist(df_FD002, col="cycle")

In [None]:
plot_operatoinal_life_time_dist(df_FD003, col="cycle")

In [None]:
plot_operatoinal_life_time_dist(df_FD004, col="cycle")

In [None]:

# for i, unit in enumerate(df_FD001["unit"].unique().tolist()):
    