In [7]:
import pandas as pd
import numpy as np
import networkx as nx
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
import matplotlib.pyplot as plt
import graphviz
from sklearn.linear_model import LassoCV

In [None]:
def load_data(telemetry_path: str, errors_path: str, failures_path: str, maint_path: str, machines_path: str, machine_id: int = 0) -> pd.DataFrame:
    def process_data(path: str, one_hot: bool = False) -> pd.DataFrame:
        df = pd.read_csv(path)
        df["datetime"] = pd.to_datetime(df["datetime"])
        df = df.set_index("datetime")
        df = df.sort_index()

        if one_hot:
            df = pd.get_dummies(df)

        return df

    df_telemetry = process_data(telemetry_path)
    # print(df_telemetry.head())
    df_errors = process_data(errors_path, one_hot=True)
    df_failures = process_data(failures_path, one_hot=True)
    df_maint = process_data(maint_path, one_hot=True)
    # df_machine_info = pd.read_csv(machines_path, index_col="machineID")

    # df_merged = df_telemetry.reset_index().merge(right=df_machine_info, how="left", left_on="machineID", right_on="machineID", )
    df_merged = df_telemetry
    df_merged = df_merged.merge(right=df_errors, how="left", left_on=["datetime", "machineID"], right_on=["datetime", "machineID"])
    df_merged = df_merged.merge(right=df_failures, how="left", left_on=["datetime", "machineID"], right_on=["datetime", "machineID"])
    df_merged = df_merged.merge(right=df_maint, how="left", left_on=["datetime", "machineID"], right_on=["datetime", "machineID"])
    df_merged = df_merged.fillna(0)


    scaler = StandardScaler()
    for each in df_merged["machineID"].unique():
        mask = df_merged["machineID"] == each
        df_merged.loc[mask, ["volt", "rotate", "pressure", "vibration"]] = scaler.fit_transform(df_merged.loc[mask, ["volt", "rotate", "pressure", "vibration"]])

    rename_map = {"errorID_error1": "error_1", "errorID_error2": "error_2", "errorID_error3": "error_3", "errorID_error4": "error_4", "errorID_error5": "error_5",
                  "failure_comp1": "failure_c1", "failure_comp2": "failure_c2", "failure_comp3": "failure_c3", "failure_comp4": "failure_c4",
                  "comp_comp1": "maint_c1", "comp_comp2": "maint_c2", "comp_comp3": "maint_c3", "comp_comp4": "maint_c4"}
    df_merged = df_merged.rename(columns=rename_map)

    if machine_id != 0:
        df_merged = df_merged[df_merged["machineID"] == machine_id]

    return df_merged

In [24]:
data = load_data(
    telemetry_path="./data/PdM_telemetry.csv",
    errors_path="./data/PdM_errors.csv",
    failures_path="./data/PdM_failures.csv",
    maint_path="./data/PdM_maint.csv",
    machines_path="./data/PdM_machines.csv",
    machine_id=1
)
data.head()

                     machineID        volt      rotate    pressure  vibration
datetime                                                                     
2015-01-01 06:00:00          1  176.217853  418.504078  113.077935  45.087686
2015-01-01 06:00:00         53  183.084582  420.980061  109.235805  45.737760
2015-01-01 06:00:00         99  168.596133  384.747105  110.921131  41.944692
2015-01-01 06:00:00         12  171.404215  576.923563   97.145400  47.725909
2015-01-01 06:00:00          6  136.878588  492.088420  149.003582  22.973289


Unnamed: 0_level_0,machineID,volt,rotate,pressure,vibration,error_1,error_2,error_3,error_4,error_5,failure_c1,failure_c2,failure_c3,failure_c4,maint_c1,maint_c2,maint_c3,maint_c4
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2015-01-01 06:00:00,1,0.350729,-0.532484,1.142616,0.811015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-01 07:00:00,1,-0.519916,-0.834129,-0.479703,0.50919,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-01 08:00:00,1,0.009488,1.551269,-2.341926,-1.156206,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-01 09:00:00,1,-0.547094,-1.91765,0.789984,0.095898,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015-01-01 10:00:00,1,-0.863849,-0.209469,1.032915,-2.63283,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
