#### Original data had "train" and "test" broken up into 4 files each, which is not necessary so merge them  
Each of the original files had Units starting at 1 so when we merge, need to renumber the Unit so it's unique

In [1]:
import pandas as pd
from getConfig import getConfig

In [2]:
config = getConfig()

In [3]:
trainFiles = ["train1.csv", "train2.csv", "train3.csv", "train4.csv"]
testFiles  = ["test1.csv", "test2.csv", "test3.csv", "test4.csv"]
cols=["unit","cycle","setting1","setting2","setting3",\
      "sensor1","sensor2","sensor3","sensor4","sensor5","sensor6","sensor7",\
      "sensor8","sensor9","sensor10","sensor11","sensor12","sensor13","sensor14",\
      "sensor15","sensor16","sensor17","sensor18","sensor19","sensor20","sensor21"]

In [4]:
def process(df, total):
    '''
    Rename the columns
    Renumber the Units
    "total" is the number of unique Units already processed
    '''
    df.columns = cols
    df["unit"] += total
    return df

In [5]:
def mergeFiles(files):
    dfList = []
    total = 0          # This is the number of unique Units, used for renumbering
    
    for f in files:
        df = pd.read_csv(config["dataLoc"]+f)
        df = process(df, total)
        dfList.append(df)
        total += len(df["unit"].unique())   # Get the number of unique units processed in this file

    return pd.concat(dfList)

In [6]:
def calcRUL(df):
    '''
    For Training, each unit has a number of cycles (rows). The last cycle is the one where Failure 
    occurred. So if there are 20 cycles, then cycle 3 would have RUL of 16
    '''
    dfList = []
    df.set_index("unit", inplace=True)
    grp = df.groupby(level=0)
    
    for key, val in grp:
        cycles = val.shape[0]
        val = val.assign(RUL = list(range(cycles-1, -1, -1)))
        val.reset_index(inplace=True)
        dfList.append(val)
    df = pd.concat(dfList, ignore_index=True)
    return df

In [7]:
df = mergeFiles(trainFiles)
df = calcRUL(df)
df.to_csv(config["dataLoc"]+"trainMerged.csv", index=False)

In [8]:
df = mergeFiles(testFiles)
df.to_csv(config["dataLoc"]+"testMerged.csv", index=False)