In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA

In [24]:
def Transform_Scale_DS(raw_data, IsTrainingData):
    df = raw_data.drop(['Sensor3', 'Sensor4', 'Sensor8','Sensor9', 'Sensor13', 'Sensor19', 'Sensor21', 'Sensor22'], axis=1)
    df = df.drop(['Sensor1', 'Sensor2'], axis=1)
    df = df.merge(df[['Engine', 'Cycles']].groupby('Engine').max(), on = 'Engine', how='left')
    df = df.rename(columns={"Cycles_x": "Cycles", "Cycles_y": "max_cycles"})
    pca, pcadf = PCA_Custom(df.iloc[:, 2:-2]) #Include only Sensor Data
    pca_df = pd.DataFrame(pcadf)
    df['PC1'] = pca_df.iloc[:, 0]
    
    sens_df = df.drop(['Engine', 'Cycles', 'Remaining Cycles', 'max_cycles'], axis = 1)
    ss = StandardScaler()
    scaled_features = ss.fit_transform(sens_df)
    scaled_features_df = pd.DataFrame(columns = sens_df.columns, data = scaled_features)
    scaled_features_df['Cycles'] = df['Cycles']
    scaled_features_df
    if IsTrainingData:
        scaled_features_df.to_csv('XTrain.csv', index = False)
        df['Remaining Cycles'].to_csv('YTrain.csv', index = False)
        df['Engine'].to_csv('Engine.csv', index = False)
    else:
        scaled_features_df.to_csv('XTest.csv', index = False)
        df['Remaining Cycles'].to_csv('YTest.csv', index = False)    

In [25]:
# Extract First Principal Component
def PCA_Custom(local_df):
    ss = StandardScaler()
    ss.fit(local_df)
    scaled_df = ss.transform(local_df)
    
    pca = PCA(n_components = 0.95)
    pca.fit(scaled_df)
    pca_df = pca.transform(scaled_df)
    return pca, pca_df

In [26]:
raw = pd.read_csv('E:\\NextStep\\PdM\\Predictive-Maintenance\\Datasets\\train_FD001.csv')
Transform_Scale_DS(raw, 1)

In [27]:
raw = pd.read_csv('E:\\NextStep\\PdM\\Predictive-Maintenance\\Datasets\\test_FD001.csv')
Transform_Scale_DS(raw, 0)

In [9]:
from sklearn.model_selection import GroupShuffleSplit

In [10]:
# Group Shuffle Split
gss = GroupShuffleSplit(n_splits = 2, train_size = 0.7, random_state = 42)

In [17]:
groups = df['Engine']

In [19]:
for i, (train_index, test_index) in enumerate(gss.split(scaled_features_df, df['Remaining Cycles'], df['Engine'])):
    print(f"Fold {i}:")
    print(f"  Train: index={train_index}, group={groups[train_index].unique()}")
    print(f"  Test:  index={test_index}, group={groups[test_index].unique()}")

Fold 0:
  Train: index=[  192   193   194 ... 20628 20629 20630], group=[  2   3   4   6   7   8   9  12  14  15  17  18  20  21  22  24  25  26
  28  29  30  33  35  36  37  38  39  42  44  47  48  49  50  51  52  53
  55  57  58  59  60  61  62  63  64  65  66  67  68  69  72  75  76  79
  80  82  83  85  86  87  88  90  92  93  94  95  96  98  99 100]
  Test:  index=[    0     1     2 ... 20087 20088 20089], group=[ 1  5 10 11 13 16 19 23 27 31 32 34 40 41 43 45 46 54 56 70 71 73 74 77
 78 81 84 89 91 97]
Fold 1:
  Train: index=[    0     1     2 ... 20428 20429 20430], group=[ 1  2  3  4  5  6  7  8  9 11 12 13 14 15 20 22 23 25 27 28 29 30 32 33
 34 35 36 37 39 41 42 43 44 45 46 47 48 49 51 52 53 58 59 62 63 64 65 68
 69 70 71 72 73 76 77 78 79 81 82 85 86 87 91 92 93 95 96 97 98 99]
  Test:  index=[ 1914  1915  1916 ... 20628 20629 20630], group=[ 10  16  17  18  19  21  24  26  31  38  40  50  54  55  56  57  60  61
  66  67  74  75  80  83  84  88  89  90  94 100]


In [21]:
scaled_features_df.iloc[train_index, :]

Unnamed: 0,Sensor5,Sensor6,Sensor7,Sensor10,Sensor11,Sensor12,Sensor14,Sensor15,Sensor16,Sensor17,Sensor18,Sensor20,Sensor23,Sensor24,PC1,Cycles
0,-1.721725,-0.134255,-0.925936,1.121141,-0.516338,-0.862813,-0.266467,0.334262,-1.058890,-0.269071,-0.603816,-0.781710,1.348493,1.194427,-0.986836,1
1,-1.061780,0.211528,-0.643726,0.431930,-0.798093,-0.958818,-0.191583,1.174899,-0.363646,-0.642845,-0.275852,-0.781710,1.016528,1.236922,-0.791632,2
2,-0.661813,-0.413166,-0.525953,1.008155,-0.234584,-0.557139,-1.015303,1.364721,-0.919841,-0.551629,-0.649144,-2.073094,0.739891,0.503423,-0.998349,3
3,-0.661813,-1.261314,-0.784831,1.222827,0.188048,-0.713826,-1.539489,1.961302,-0.224597,-0.520176,-1.971665,-0.781710,0.352598,0.777792,-1.133894,4
4,-0.621816,-1.251528,-0.301518,0.714393,-0.516338,-0.457059,-0.977861,1.052871,-0.780793,-0.521748,-0.339845,-0.136018,0.463253,1.059552,-0.812394,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20426,2.077962,1.609343,2.121711,-2.573479,2.442085,-0.469286,2.579113,-2.458824,2.695429,-1.059081,1.235982,1.801059,-1.749846,-2.283689,2.416882,181
20427,1.677995,2.415084,2.376144,-1.985955,2.301208,-0.087533,2.317020,-2.838467,2.417331,-0.669056,1.873245,-0.136018,-2.358448,-2.977465,2.421821,182
20428,2.557922,2.679315,2.107267,-2.833345,3.146471,-0.427624,3.515159,-2.336796,2.973526,-0.883465,2.539838,1.155367,-1.362553,-1.346030,2.701216,183
20429,2.837899,1.578353,3.067226,-2.098941,2.723839,-0.201652,2.167253,-2.133416,3.251624,-0.948469,3.121107,1.155367,-2.026483,-2.234728,2.690129,184


In [23]:
scaled_features_df.iloc[test_index, :]

Unnamed: 0,Sensor5,Sensor6,Sensor7,Sensor10,Sensor11,Sensor12,Sensor14,Sensor15,Sensor16,Sensor17,Sensor18,Sensor20,Sensor23,Sensor24,PC1,Cycles
1914,-1.521742,-0.422952,-1.859229,1.132439,-1.361602,-0.624161,-1.839024,1.527425,-1.197939,-0.322018,-1.019770,-1.427402,1.237838,2.228162,-1.642114,1
1915,-0.921791,-0.605630,-1.878117,1.573082,-0.516338,-0.371923,-1.277396,2.205358,-0.780793,-0.062001,-1.475720,-2.073094,1.293165,0.193025,-1.441040,2
1916,-1.041781,-1.543487,-1.491467,1.188932,-1.220725,-0.805754,-1.314838,1.093547,-1.754134,-0.361859,0.001451,-0.781710,1.071855,1.991669,-1.418686,3
1917,-1.081778,-0.057596,-2.309210,0.929066,-1.502479,-0.353809,-1.614373,1.486749,-0.919841,-0.373916,-1.315737,-1.427402,0.075961,0.385176,-1.291169,4
1918,-1.101776,-1.538594,-1.395916,1.008155,-1.925111,-0.864172,-1.277396,1.188458,-1.197939,-0.251772,-1.446390,-0.781710,1.348493,1.620300,-1.545065,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,1.618000,1.216258,2.188375,-2.189329,1.315066,0.012547,1.980044,-2.607969,2.278282,-0.322542,1.425294,2.446751,-1.805173,-2.921113,2.292633,196
20627,1.717992,2.279706,2.738351,-2.833345,1.878576,-0.006020,1.867718,-2.350355,1.722087,-0.380207,1.913240,1.155367,-2.856395,-1.203764,2.341811,197
20628,1.478011,1.946971,2.138377,-2.742957,2.019453,0.029755,2.054927,-1.902919,2.000184,-0.141684,3.265092,3.092444,-2.081810,-3.292481,2.669470,198
20629,1.098043,2.403666,1.955051,-3.036719,2.160330,0.383884,3.178182,-2.363913,1.861136,-0.233948,2.579834,1.155367,-2.911722,-2.085072,2.583058,199
