In [47]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

In [48]:
from google.colab import drive
drive.mount('/content/drive')
train_data="/content/drive/MyDrive/Colab_Notebooks/Predictive_Maintenance/train_FD001.txt"
test_data="/content/drive/MyDrive/Colab_Notebooks/Predictive_Maintenance/test_FD001.txt"
rul_data="/content/drive/MyDrive/Colab_Notebooks/Predictive_Maintenance/RUL_FD001.txt"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [49]:
df = pd.read_csv(train_data, sep=' ', header=None)
test_df=pd.read_csv(test_data, sep=' ', header=None)
column_names = ['unit_number', 'time_in_cycles', 'setting1', 'setting2', 'setting3'] + [f'sensor{i}' for i in range(1, 23)] + ['RUL']
df.columns = column_names
test_column_names = ['unit_number', 'time_in_cycles', 'setting1', 'setting2', 'setting3'] + [f'sensor{i}' for i in range(1, 24)]
test_df.columns=test_column_names


In [50]:
len(df)

20631

In [51]:
df.head()

Unnamed: 0,unit_number,time_in_cycles,setting1,setting2,setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,sensor22,RUL
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419,,
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236,,
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,,
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,,
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044,,


In [52]:
test_df.head()

Unnamed: 0,unit_number,time_in_cycles,setting1,setting2,setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,sensor22,sensor23
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,...,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735,,
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,...,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916,,
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,...,8130.1,8.4441,0.03,393,2388,100.0,39.08,23.4166,,
3,1,4,0.0042,0.0,100.0,518.67,642.44,1584.12,1406.42,14.62,...,8132.9,8.3917,0.03,391,2388,100.0,39.0,23.3737,,
4,1,5,0.0014,0.0,100.0,518.67,642.51,1587.19,1401.92,14.62,...,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.413,,


In [53]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20631 entries, 0 to 20630
Data columns (total 28 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   unit_number     20631 non-null  int64  
 1   time_in_cycles  20631 non-null  int64  
 2   setting1        20631 non-null  float64
 3   setting2        20631 non-null  float64
 4   setting3        20631 non-null  float64
 5   sensor1         20631 non-null  float64
 6   sensor2         20631 non-null  float64
 7   sensor3         20631 non-null  float64
 8   sensor4         20631 non-null  float64
 9   sensor5         20631 non-null  float64
 10  sensor6         20631 non-null  float64
 11  sensor7         20631 non-null  float64
 12  sensor8         20631 non-null  float64
 13  sensor9         20631 non-null  float64
 14  sensor10        20631 non-null  float64
 15  sensor11        20631 non-null  float64
 16  sensor12        20631 non-null  float64
 17  sensor13        20631 non-null 

In [54]:
df=df.drop(['sensor22'],axis=1)
test_df=test_df.drop(['sensor22','sensor23'],axis=1)

In [55]:
rul=pd.read_csv(rul_data,header=None,sep=' ')
rul.head()

Unnamed: 0,0,1
0,112,
1,98,
2,69,
3,82,
4,91,


In [56]:
rul=pd.read_csv(rul_data,header=None,sep=' ')

rul_dict = {i+1: rul.iloc[i, 0] for i in range(len(rul))}

df['RUL'] = df['unit_number'].map(rul_dict)



In [57]:
df.head()

Unnamed: 0,unit_number,time_in_cycles,setting1,setting2,setting3,sensor1,sensor2,sensor3,sensor4,sensor5,...,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,RUL
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419,112
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236,112
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,112
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,112
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044,112


In [58]:
x=df.drop(['RUL'],axis=1)
y=pd.DataFrame(df['RUL'])

x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=42,test_size=0.1)


In [59]:
model=DecisionTreeRegressor()
model.fit(x_train,y_train)
pred=model.predict(x_test)
print("Accuracy : ",accuracy_score(y_test,pred))

Accuracy :  0.9946692512721105


In [60]:
predict=model.predict(test_df)
predict[:100]

array([112., 112., 112., 112., 112., 112., 112., 112., 112., 112., 112.,
       112., 112., 112., 112., 112., 112., 112., 112., 112., 112., 112.,
       112., 112., 112., 112., 112., 112., 112., 112., 112.,  98.,  98.,
        98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,
        98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,
        98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,
        98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,  98.,
        98.,  98.,  98.,  69.,  69.,  69.,  69.,  69.,  69.,  69.,  69.,
        69.,  69.,  69.,  69.,  69.,  69.,  69.,  69.,  69.,  69.,  69.,
        69.])