## Sensor Variables in Data
| Index	| Symbol | Description | Unit |
| --- | --- | --- | --- |
| 1	| T2 | Total | temperature at fan inlet | °R |
| 2	| T24 | Total temperature at LPC outlet | °R |
| 3	| T30 | Total temperature at HPC outlet | °R |
| 4	| T50 | Total temperature at LPT outlet | °R |
| 5	| P2 | Pressure at fan inlet | psia |
| 6	| P15 | Total pressure in bypass-duct | psia |
| 7	| P30 | Total pressure at HPC outlet | psia |
| 8	| Nf | Physical fan speed | rpm |
| 9	| Nc | Physical core speed | rpm |
| 10 | epr | Engine pressure ratio (P50/P2) | — |
| 11 | Ps30 | Static pressure at HPC outlet | psia |
| 12 | phi | Ratio of fuel flow to Ps30 | pps/psi |
| 13 | NRf | Corrected fan speed | rpm |
| 14 | NRc | Corrected core speed | rpm |
| 15 | BPR | Bypass ratio | — |
| 16 | farB | Burner fuel-air ratio | — |
| 17 | htBleed | Bleed enthalpy | — |
| 18 | Nf_dmd | Demanded fan speed | rpm |
| 19 | PCNFR_dmd | Demanded corrected fan speed | rpm |
| 20 | W31	HPT | coolant bleed | lbm/s |
| 21 | W32	LPT | coolant bleed | lbm/s |


In [21]:
from pandas import read_csv, merge
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [22]:
DATA_LABELS = ['unit', 'cycles', 'op_setting1', 'op_setting2', 'op_setting3', 's1', 's2', 's3',
               's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16',
               's17', 's18', 's19', 's20', 's21']

In [23]:
df = read_csv("Data/train_FD001.txt", delimiter=" ", names=DATA_LABELS, usecols=range(len(DATA_LABELS)))


In [24]:
df.describe()


Unnamed: 0,unit,cycles,op_setting1,op_setting2,op_setting3,s1,s2,s3,s4,s5,...,s12,s13,s14,s15,s16,s17,s18,s19,s20,s21
count,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,...,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0
mean,51.506568,108.807862,-9e-06,2e-06,100.0,518.67,642.680934,1590.523119,1408.933782,14.62,...,521.41347,2388.096152,8143.752722,8.442146,0.03,393.210654,2388.0,100.0,38.816271,23.289705
std,29.227633,68.88099,0.002187,0.000293,0.0,0.0,0.500053,6.13115,9.000605,1.7764e-15,...,0.737553,0.071919,19.076176,0.037505,1.3878120000000003e-17,1.548763,0.0,0.0,0.180746,0.108251
min,1.0,1.0,-0.0087,-0.0006,100.0,518.67,641.21,1571.04,1382.25,14.62,...,518.69,2387.88,8099.94,8.3249,0.03,388.0,2388.0,100.0,38.14,22.8942
25%,26.0,52.0,-0.0015,-0.0002,100.0,518.67,642.325,1586.26,1402.36,14.62,...,520.96,2388.04,8133.245,8.4149,0.03,392.0,2388.0,100.0,38.7,23.2218
50%,52.0,104.0,0.0,0.0,100.0,518.67,642.64,1590.1,1408.04,14.62,...,521.48,2388.09,8140.54,8.4389,0.03,393.0,2388.0,100.0,38.83,23.2979
75%,77.0,156.0,0.0015,0.0003,100.0,518.67,643.0,1594.38,1414.555,14.62,...,521.95,2388.14,8148.31,8.4656,0.03,394.0,2388.0,100.0,38.95,23.3668
max,100.0,362.0,0.0087,0.0006,100.0,518.67,644.53,1616.91,1441.49,14.62,...,523.38,2388.56,8293.72,8.5848,0.03,400.0,2388.0,100.0,39.43,23.6184


In [25]:
stddev_threshold = 0.00001
df_stddev_filtered = df.drop(df.std()[df.std() < stddev_threshold].index.values, axis=1)
df_stddev_filtered


Unnamed: 0,unit,cycles,op_setting1,op_setting2,s2,s3,s4,s6,s7,s8,s9,s11,s12,s13,s14,s15,s17,s20,s21
0,1,1,-0.0007,-0.0004,641.82,1589.70,1400.60,21.61,554.36,2388.06,9046.19,47.47,521.66,2388.02,8138.62,8.4195,392,39.06,23.4190
1,1,2,0.0019,-0.0003,642.15,1591.82,1403.14,21.61,553.75,2388.04,9044.07,47.49,522.28,2388.07,8131.49,8.4318,392,39.00,23.4236
2,1,3,-0.0043,0.0003,642.35,1587.99,1404.20,21.61,554.26,2388.08,9052.94,47.27,522.42,2388.03,8133.23,8.4178,390,38.95,23.3442
3,1,4,0.0007,0.0000,642.35,1582.79,1401.87,21.61,554.45,2388.11,9049.48,47.13,522.86,2388.08,8133.83,8.3682,392,38.88,23.3739
4,1,5,-0.0019,-0.0002,642.37,1582.85,1406.22,21.61,554.00,2388.06,9055.15,47.28,522.19,2388.04,8133.80,8.4294,393,38.90,23.4044
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,196,-0.0004,-0.0003,643.49,1597.98,1428.63,21.61,551.43,2388.19,9065.52,48.07,519.49,2388.26,8137.60,8.4956,397,38.49,22.9735
20627,100,197,-0.0016,-0.0005,643.54,1604.50,1433.58,21.61,550.86,2388.23,9065.11,48.04,519.68,2388.22,8136.50,8.5139,395,38.30,23.1594
20628,100,198,0.0004,0.0000,643.42,1602.46,1428.18,21.61,550.94,2388.24,9065.90,48.09,520.01,2388.24,8141.05,8.5646,398,38.44,22.9333
20629,100,199,-0.0011,0.0003,643.23,1605.26,1426.53,21.61,550.68,2388.25,9073.72,48.39,519.67,2388.23,8139.29,8.5389,395,38.29,23.0640


In [26]:
df1 = merge(df_stddev_filtered, df_stddev_filtered.groupby('unit', as_index=False)['cycles'].max(), how='left', on='unit')
df1.rename(columns={"cycles_x": "cycles", "cycles_y": "maxcycles"}, inplace=True)
df1['RUL'] = df1['maxcycles'] - df1['cycles']
df1


Unnamed: 0,unit,cycles,op_setting1,op_setting2,s2,s3,s4,s6,s7,s8,...,s11,s12,s13,s14,s15,s17,s20,s21,maxcycles,RUL
0,1,1,-0.0007,-0.0004,641.82,1589.70,1400.60,21.61,554.36,2388.06,...,47.47,521.66,2388.02,8138.62,8.4195,392,39.06,23.4190,192,191
1,1,2,0.0019,-0.0003,642.15,1591.82,1403.14,21.61,553.75,2388.04,...,47.49,522.28,2388.07,8131.49,8.4318,392,39.00,23.4236,192,190
2,1,3,-0.0043,0.0003,642.35,1587.99,1404.20,21.61,554.26,2388.08,...,47.27,522.42,2388.03,8133.23,8.4178,390,38.95,23.3442,192,189
3,1,4,0.0007,0.0000,642.35,1582.79,1401.87,21.61,554.45,2388.11,...,47.13,522.86,2388.08,8133.83,8.3682,392,38.88,23.3739,192,188
4,1,5,-0.0019,-0.0002,642.37,1582.85,1406.22,21.61,554.00,2388.06,...,47.28,522.19,2388.04,8133.80,8.4294,393,38.90,23.4044,192,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,196,-0.0004,-0.0003,643.49,1597.98,1428.63,21.61,551.43,2388.19,...,48.07,519.49,2388.26,8137.60,8.4956,397,38.49,22.9735,200,4
20627,100,197,-0.0016,-0.0005,643.54,1604.50,1433.58,21.61,550.86,2388.23,...,48.04,519.68,2388.22,8136.50,8.5139,395,38.30,23.1594,200,3
20628,100,198,0.0004,0.0000,643.42,1602.46,1428.18,21.61,550.94,2388.24,...,48.09,520.01,2388.24,8141.05,8.5646,398,38.44,22.9333,200,2
20629,100,199,-0.0011,0.0003,643.23,1605.26,1426.53,21.61,550.68,2388.25,...,48.39,519.67,2388.23,8139.29,8.5389,395,38.29,23.0640,200,1


In [56]:
train_x = df1.drop(["unit", "cycles"], axis = 1).values

train_x

array([[-7.00000e-04, -4.00000e-04,  6.41820e+02, ...,  2.34190e+01,
         1.92000e+02,  1.91000e+02],
       [ 1.90000e-03, -3.00000e-04,  6.42150e+02, ...,  2.34236e+01,
         1.92000e+02,  1.90000e+02],
       [-4.30000e-03,  3.00000e-04,  6.42350e+02, ...,  2.33442e+01,
         1.92000e+02,  1.89000e+02],
       ...,
       [ 4.00000e-04,  0.00000e+00,  6.43420e+02, ...,  2.29333e+01,
         2.00000e+02,  2.00000e+00],
       [-1.10000e-03,  3.00000e-04,  6.43230e+02, ...,  2.30640e+01,
         2.00000e+02,  1.00000e+00],
       [-3.20000e-03, -5.00000e-04,  6.43850e+02, ...,  2.30522e+01,
         2.00000e+02,  0.00000e+00]])

In [28]:
df_test = read_csv("Data/test_FD001.txt", delimiter=" ", names=DATA_LABELS, usecols=range(len(DATA_LABELS)))
df_test_stddev_filtered = df_test.drop(df_test.std()[df_test.std() < stddev_threshold].index.values, axis=1)
df2 = merge(df_test_stddev_filtered, df_test_stddev_filtered.groupby('unit', as_index=False)['cycles'].max(), how='left', on='unit')
df2.rename(columns={"cycles_x": "cycles", "cycles_y": "maxcycles"}, inplace=True)
df2['RUL'] = df2['maxcycles'] - df2['cycles']
df2

Unnamed: 0,unit,cycles,op_setting1,op_setting2,s2,s3,s4,s6,s7,s8,...,s11,s12,s13,s14,s15,s17,s20,s21,maxcycles,RUL
0,1,1,0.0023,0.0003,643.02,1585.29,1398.21,21.61,553.90,2388.04,...,47.20,521.72,2388.03,8125.55,8.4052,392,38.86,23.3735,31,30
1,1,2,-0.0027,-0.0003,641.71,1588.45,1395.42,21.61,554.85,2388.01,...,47.50,522.16,2388.06,8139.62,8.3803,393,39.02,23.3916,31,29
2,1,3,0.0003,0.0001,642.46,1586.94,1401.34,21.61,554.11,2388.05,...,47.50,521.97,2388.03,8130.10,8.4441,393,39.08,23.4166,31,28
3,1,4,0.0042,0.0000,642.44,1584.12,1406.42,21.61,554.07,2388.03,...,47.28,521.38,2388.05,8132.90,8.3917,391,39.00,23.3737,31,27
4,1,5,0.0014,0.0000,642.51,1587.19,1401.92,21.61,554.16,2388.01,...,47.31,522.15,2388.03,8129.54,8.4031,390,38.99,23.4130,31,26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13091,100,194,0.0049,0.0000,643.24,1599.45,1415.79,21.61,553.41,2388.02,...,47.69,520.69,2388.00,8213.28,8.4715,394,38.65,23.1974,198,4
13092,100,195,-0.0011,-0.0001,643.22,1595.69,1422.05,21.61,553.22,2388.05,...,47.60,521.05,2388.09,8210.85,8.4512,395,38.57,23.2771,198,3
13093,100,196,-0.0006,-0.0003,643.44,1593.15,1406.82,21.61,553.04,2388.11,...,47.57,521.18,2388.04,8217.24,8.4569,395,38.62,23.2051,198,2
13094,100,197,-0.0038,0.0001,643.26,1594.99,1419.36,21.61,553.37,2388.07,...,47.61,521.33,2388.08,8220.48,8.4711,395,38.66,23.2699,198,1


In [29]:
test_x = df2.drop(["unit", "cycles"], axis = 1).values
test_x

array([[ 2.30000e-03,  3.00000e-04,  6.43020e+02, ...,  2.33735e+01,
         3.10000e+01,  3.00000e+01],
       [-2.70000e-03, -3.00000e-04,  6.41710e+02, ...,  2.33916e+01,
         3.10000e+01,  2.90000e+01],
       [ 3.00000e-04,  1.00000e-04,  6.42460e+02, ...,  2.34166e+01,
         3.10000e+01,  2.80000e+01],
       ...,
       [-6.00000e-04, -3.00000e-04,  6.43440e+02, ...,  2.32051e+01,
         1.98000e+02,  2.00000e+00],
       [-3.80000e-03,  1.00000e-04,  6.43260e+02, ...,  2.32699e+01,
         1.98000e+02,  1.00000e+00],
       [ 1.30000e-03,  3.00000e-04,  6.42950e+02, ...,  2.31855e+01,
         1.98000e+02,  0.00000e+00]])

In [30]:
train_y = df1['RUL'].values
train_y

array([191, 190, 189, ...,   2,   1,   0], dtype=int64)

In [33]:
reg = LinearRegression().fit(train_x, train_y)
print(reg.coef_)
print(reg.intercept_)

[-6.05709538e-13 -1.61756719e-11  1.97107955e-14  2.41820453e-15
  2.45636844e-15  1.10812838e-12 -2.63331024e-14 -4.16297788e-14
  1.97064587e-15  1.50995536e-13 -4.74016442e-14 -7.92551788e-15
  1.79023463e-15  4.58499693e-13  8.10462808e-15 -8.85962310e-14
 -1.76731785e-13 -2.33146835e-15  1.00000000e+00]
7.470646323781693e-11


In [35]:
y_pred = reg.predict(test_x)
print(y_pred)

[3.00000000e+01 2.90000000e+01 2.80000000e+01 ... 2.00000000e+00
 1.00000000e+00 1.84203649e-13]


In [58]:
max_cycles = df_test_stddev_filtered[df_test_stddev_filtered.groupby(['unit'])['cycles'].transform(max) == df_test_stddev_filtered['cycles']]
max_cycles_for_prediction = max_cycles.drop(["unit", "cycles"], axis = 1)

In [59]:
prediction = reg.predict(max_cycles.values)
print(prediction)

[23.3552 23.2618 23.274  23.2581 23.4117 23.3269 23.2763 23.2465 23.3515
 23.2956 23.2908 23.2861 23.282  23.2603 23.4085 23.304  23.2948 23.2637
 23.3297 23.0242 23.4982 23.3058 23.2815 23.2093 23.3876 23.366  23.185
 23.2826 23.1975 23.3175 23.1105 23.3456 23.2843 23.0641 23.1161 23.1534
 23.2145 23.1739 23.3721 23.0906 23.1718 23.094  23.2379 23.3058 23.312
 23.1876 23.2595 23.3238 23.1167 23.291  23.3108 23.211  23.1671 23.4385
 23.3438 23.1772 23.291  23.1747 23.4337 23.2971 23.1314 23.2719 23.2344
 23.167  23.327  23.0669 23.2914 23.1989 23.1958 23.2231 23.3188 23.2905
 23.3074 23.201  23.4919 23.0737 23.1712 23.2748 23.287  23.2813 23.1537
 23.1211 23.3021 23.2039 23.3107 23.2484 23.2605 23.4181 23.235  23.1409
 23.2522 23.1211 23.1944 23.2933 23.3458 23.4606 23.2953 23.3608 23.3595
 23.1855]
