In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [4]:
def getTFData(path = "dataset", sets = [1,2,3,4], testing = True, maxrul = None):
    # load turbofan data
    
    # id, delta time, settings, sensors
    colnames = ['id', 'dt',
                    'set1', 'set2', 'set3',
                        's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 
                        's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']
    
    if 1 in sets:
        train1 = pd.read_table(f"{path}/train_FD001.txt", header=None, delim_whitespace=True)
        if testing:
            test1 = pd.read_table(f"{path}/test_FD001.txt", header=None, delim_whitespace=True)
            rul1 = pd.read_table(f"{path}/RUL_FD001.txt", header=None, delim_whitespace=True)
    if 2 in sets:
        train2 = pd.read_table(f"{path}/train_FD002.txt", header=None, delim_whitespace=True)
        if testing:
            test2 = pd.read_table(f"{path}/test_FD002.txt", header=None, delim_whitespace=True)
            rul2 = pd.read_table(f"{path}/RUL_FD002.txt", header=None, delim_whitespace=True)
    if 3 in sets: 
        train3 = pd.read_table(f"{path}/train_FD003.txt", header=None, delim_whitespace=True)
        if testing:
            test3 = pd.read_table(f"{path}/test_FD003.txt", header=None, delim_whitespace=True)
            rul3 = pd.read_table(f"{path}/RUL_FD003.txt", header=None, delim_whitespace=True)
    if 4 in sets:
        train4 = pd.read_table(f"{path}/train_FD004.txt", header=None, delim_whitespace=True)
        if testing:
            test4 = pd.read_table(f"{path}/test_FD004.txt", header=None, delim_whitespace=True)
            rul4 = pd.read_table(f"{path}/RUL_FD004.txt", header=None, delim_whitespace=True)
    
    trainframes = []
    testframes = []
    
    maxidtr = 0
    maxidte = 0
    lentr = 0
    lente = 0

    getRul = lambda col: col[::-1] - 1 # reverses order, - 1 so that it ends with 0
    
    getFrac = lambda col: col / col.max() # calculates fraction
    
    for (train, test, rul) in [('train' + str(i), 'test' + str(i), 'rul' + str(i)) for i in sets]:
        
        train = eval(train)
        
        train.columns = colnames
        
        # create rul for training
        train['rul'] = train[['id', 'dt']].groupby('id').transform(getRul)
        if maxrul is not None:
            train.loc[train.rul > maxrul, 'rul'] = maxrul
        train['rulfrac'] = train[['id','rul']].groupby('id').transform(getFrac)
        
        # update index and make id unique
        train['id'] = train['id'] + maxidtr
        maxidtr = train['id'].max()
        
        train.index = range(lentr, lentr + len(train))
        lentr = lentr + len(train)
        
        trainframes.append(train)
        
        if testing:
            test = eval(test)
            rul = eval(rul)
            
            test.columns = colnames
            
            # create rul for testing
            test['rul'] = test[['id', 'dt']].groupby('id').transform(getRul)
            for j in test['id'].unique():
                if (j - 1) in rul.index:
                    n = len(test[test['id'] == j]['rul'])
                    temp = rul[0][j - 1].repeat(n) # true rul
                    test.loc[test['id'] == j, 'rul'] = test.loc[test['id'] == j, 'rul'] + temp # add true rul
            test['rulfrac'] = test[['id','rul']].groupby('id').transform(getFrac)
            
            # update index and make id unique
            test['id'] = test['id'] + maxidte
            maxidte = test['id'].max()
            
            test.index = range(lente, lente + len(test))
            lente = lente + len(test)
            
            testframes.append(test)
        
    train = pd.concat(trainframes)
    
    if testing:
        test = pd.concat(testframes)
        return train, test
    return train

In [1]:
def pltS(df, s, mod = 10):
    plt.figure(figsize=(15,5))
    counter = 0
    for i in d['id'].unique():
        counter += 1
        if counter % mod == 0:
            plt.plot('rul', s, data = df[df['id'] == i])
    plt.xlim(375, 0)
    plt.xticks(np.arange(0, 400, 25))
    plt.ylabel(s)
    plt.xlabel('Remaining Use fulLife')
    plt.show()

In [2]:
def pltSC(data, s, c, mod = 10):
    plt.figure(figsize=(15,5))
    df = data.copy()
    df = df[df[c] == 1]
    counter = 0
    for i in df['id'].unique():
        counter += 1
        if counter % mod == 0:
            plt.plot('rul', s, data = df[df['id'] == i])
    plt.xlim(500, 0)
    plt.xticks(np.arange(0, 525, 25))
    plt.ylabel(s)
    plt.xlabel('Remaining Use fulLife')
    plt.show()

In [6]:
def lagData(data, lagsize = 5, dropna = False):
    # creates lagged data frame
    
    colnames = data.columns
    
    tempframe =[]
    
    for i, d in data.groupby('id'):
        temp = {} # new dict
        for name in colnames:
            temp[name] = d[name]
            if name not in ['id','dt','rul','rulfrac']:
                for j in range(lagsize):
                    temp['%s_lg_%d' %(name, j + 1)] = d[name].shift(j + 1)
        tempframe.append(pd.DataFrame(temp, index = d.index))            
    df = pd.concat(tempframe)
    if dropna:
        df = df.dropna()
        df.index = range(len(df))
    return df

In [None]:
#def aggregateCategories

In [2]:
def addSettings(data):
    # from prior explorataion
    setting_names = ['set1', 'set2', 'set3']
    settings_df = data[setting_names].copy()
    settings_df['set1'] = settings_df['set1'].round()
    settings_df['set2'] = settings_df['set2'].round(decimals=2)
    
    data['c1'] = 0
    data['c2'] = 0
    data['c3'] = 0
    data['c4'] = 0
    data['c5'] = 0
    data['c6'] = 0

    c = 0
    for i, d in settings_df.groupby(by = ['set1','set2','set3']):
        c += 1
        data.loc[d.index,['c' + str(c)]] = 1
    return data

In [None]:
# ToDo: Settings allgemein? (ueberhaupt sinnvoll?)

In [8]:
def cScale(dftrain, dftest, sensors):

    scaler = StandardScaler()
    
    for c in ['c1', 'c2', 'c3', 'c4', 'c5', 'c6']:
        scaler.fit(dftrain.loc[dftrain[c] == 1, sensors])
        
        dftrain.loc[dftrain[c] == 1, sensors] = scaler.transform(dftrain.loc[dftrain[c] == 1, sensors])
        
        dftest.loc[dftest[c] == 1, sensors] = scaler.transform(dftest.loc[dftest[c] == 1, sensors])
    
    return dftrain, dftest