<font size = 6 color = green><b> Predicitive Maintenance / 智能性维护实例 </b></font>
# Menu A-a: Load Load / 读取数据 

# Libraries / 工具库

In [1]:
import os
import re
import numpy as np 
from pathlib import Path 
import zipfile
import pandas as pd
import warnings

warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Pre-requisite / 准备工作
* Download / 下载压缩数据 from https://onedrive.live.com/?cid=7CAD6DA55D313592&id=7CAD6DA55D313592%21159&parId=7CAD6DA55D313592%21158&o=OneUp 
* Save CMAPSS zipfile to  C:/pdm/zipraw / 把下载的压缩文件存放在 C:/pdm/zipraw    
  （ or / 或 d:/pdm/zipraw）
* Unzipped data will be stored in raw data folder 


## Prepare folders / 准备文件夹

In [2]:
def get_file_paths(data_parent_folder = None):
    file_paths = {}
    if not data_parent_folder:
        data_parent_folder = os.path.dirname(os.getcwd())
    file_paths["parent_folder"] = data_parent_folder
    file_paths["raw_data_path"] = data_parent_folder + '/raw_data'
    file_paths["zip_data_path"] = data_parent_folder + '/zipraw'
    file_paths["unzip_to_path"] = data_parent_folder + '/raw_data'
    return file_paths 

### execute for this notebook/执行

In [4]:

FILE_PATHS = get_file_paths()
print(FILE_PATHS)

{'parent_folder': 'c:\\pdm', 'raw_data_path': 'c:\\pdm/raw_data', 'zip_data_path': 'c:\\pdm/zipraw', 'unzip_to_path': 'c:\\pdm/raw_data'}


## Unzip Ulitity / 解压
* Use zipfile library to unzip / 用 zipfile 工具包解压

In [6]:
f'{FILE_PATHS["zip_data_path"]}/CMAPSS.zip' 

'c:\\pdm/zipraw/CMAPSS.zip'

In [8]:
def unzip_files(zip_file_name = None,  remove_zipped = False):
    if not zip_file_name:  
        zip_file_name = f'{FILE_PATHS["zip_data_path"]}/CMAPSS.zip'

    if not os.path.exists(FILE_PATHS["zip_data_path"]):
        os.makedirs(FILE_PATHS["zip_data_path"])

    with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
        zip_ref.extractall(FILE_PATHS["unzip_to_path"])

    if remove_zipped: 
        os.remove(zip_file_name)

    return [FILE_PATHS["unzip_to_path"] + "/" + file for file in os.listdir(FILE_PATHS["unzip_to_path"])]


## File System Manipulations / 文件处理
* Use regex / 用Regex

In [16]:
def list_data_files(): 
    return [FILE_PATHS["raw_data_path"] + "/" + file for file in os.listdir(FILE_PATHS["raw_data_path"])] 

In [18]:
def get_files_regex(file_name_str = "test"): 
    raw_files = list_data_files()
    regex = re.compile(f".+{file_name_str}.+gz")  
    raw_data_files = [f for f in raw_files if re.match(regex, f)]
    return raw_data_files

## Load to dataframe / 读取到 pandas DataFrames

In [29]:
def read_data_files(file_name_str = "train", use_pd = True, sep = " ", columns = None):  
    if not columns:
        columns=["id","cycle","op1","op2","op3","sensor1","sensor2","sensor3","sensor4","sensor5","sensor6","sensor7","sensor8",
            "sensor9","sensor10","sensor11","sensor12","sensor13","sensor14","sensor15","sensor16","sensor17","sensor18","sensor19"
            ,"sensor20","sensor21" ]  

    raw_data_files = get_files_regex(file_name_str =file_name_str) 

    df_total =  pd.DataFrame() 
    for f in raw_data_files: 
        if use_pd:
            df_ = pd.read_csv(f, compression='gzip',index_col = False, names = columns, sep=' ')
        else: 
            df_= pd.DataFrame(np.loadtxt(f), columns=columns) 
        df_[["id", "cycle"]] = df_[["id", "cycle"]].astype(int)
            
        flag = re.findall(r"FD\d{3}", str(f))[0]
        df_["Flag"] = flag 
        if df_total.empty:
            df_total = df_.copy()
        else: 
            df_total = pd.concat([df_total, df_], axis = 0 ) 
    
    return df_total 



In [38]:
def read_result(file_name_str = "RUL_FD", use_pd = True, sep = " ", columns = None):
    raw_data_files = get_files_regex(file_name_str =file_name_str)  
    if not columns:
        columns = ["rul"]

    df_result =  pd.DataFrame() 
    for f in raw_data_files:
        if use_pd: 
            df_ = pd.read_csv(f, compression='gzip', index_col = False, names = columns, sep = sep)
        else:
            df_= pd.DataFrame(np.loadtxt(f), columns = columns) 
        flag = re.findall(r"FD\d{3}", str(f))[0]
        df_["Flag"] = flag 
        if df_result.empty:
            df_result = df_.copy()
        else: 
            df_result = pd.concat([df_result, df_], axis = 0 ) 
    return df_result
    

## Sum Up: prepare train, test and result file / 主程序

In [58]:
def prepare_dfs(use_pd = True, sep = " "): 
      
     columns=["id","cycle","op1","op2","op3","sensor1","sensor2","sensor3","sensor4","sensor5","sensor6","sensor7","sensor8",
          "sensor9","sensor10","sensor11","sensor12","sensor13","sensor14","sensor15","sensor16","sensor17","sensor18","sensor19"
          ,"sensor20","sensor21" ] 

     # Train
     df_train = read_data_files( file_name_str = "train", use_pd = use_pd, sep = " ", columns = columns)
     # Test
     df_test = read_data_files( file_name_str = "test", use_pd = use_pd, sep = " ", columns = columns)

     resul_columns = ["rul"]
     df_result = read_result(file_name_str = "RUL_FD", \
          use_pd = use_pd, sep =sep, columns = resul_columns)
 
     df_train.iloc[:, [0,1]] = df_train.iloc[:, [0,1]].astype(int)
     df_test.iloc[:, [0,1]] = df_test.iloc[:, [0,1]].astype(int) 

     df_max = df_test.groupby(["Flag","id"])["cycle"].max().reset_index()
     df_result = df_result.reset_index()
     df_result["id"] = df_result.groupby("Flag")["index"].rank("first", ascending = True).astype(int)
     df_result.drop(columns = ["index"], inplace = True)
     
     df_result = df_result.merge(df_max, on = ["Flag", "id"], how = "inner")
      
     df_result["rul_failed"] = df_result["rul"] + df_result["cycle"]

     df_test = df_test.merge(df_result[["rul_failed", "Flag", "id"]], on = ["Flag", "id"], how = "inner")
     df_test["remaining_rul"] = df_test["rul_failed"] - df_test["cycle"]

     #df_test[["rul_failed", "remaining_rul"]] = df_test[["rul_failed", "remaining_rul"]].astype(int)
     return df_train, df_test, df_result


## Call  prepare_dfs（）  / 调用主程序

In [59]:
df_train, df_test, df_result = prepare_dfs(use_pd=True)

Unnamed: 0,id,cycle,op1,op2,op3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,Flag
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.70,1400.60,14.62,...,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.4190,FD001
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.00,23.4236,FD001
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.20,14.62,...,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,FD001
3,1,4,0.0007,0.0000,100.0,518.67,642.35,1582.79,1401.87,14.62,...,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,FD001
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,2388.04,8133.80,8.4294,0.03,393,2388,100.0,38.90,23.4044,FD001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61244,249,251,9.9998,0.2500,100.0,489.05,605.33,1516.36,1315.28,10.52,...,2388.73,8185.69,8.4541,0.03,372,2319,100.0,29.11,17.5234,FD004
61245,249,252,0.0028,0.0015,100.0,518.67,643.42,1598.92,1426.77,14.62,...,2388.46,8185.47,8.2221,0.03,396,2388,100.0,39.38,23.7151,FD004
61246,249,253,0.0029,0.0000,100.0,518.67,643.68,1607.72,1430.56,14.62,...,2388.48,8193.94,8.2525,0.03,395,2388,100.0,39.78,23.8270,FD004
61247,249,254,35.0046,0.8400,100.0,449.44,555.77,1381.29,1148.18,5.48,...,2388.83,8125.64,9.0515,0.02,337,2223,100.0,15.26,9.0774,FD004
