In [None]:
import os
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import joblib
from sklearn.model_selection import RandomizedSearchCV

# Load data and models

In [None]:
data_location = os.path.dirname(os.path.dirname(os.path.abspath('__file__'))) + "\\Data"

In [None]:
train_df = pickle.load(open(data_location + '/train_df', 'rb'))
demo_df = pickle.load(open(data_location + '/demo_df', 'rb'))
model = joblib.load("gb_model.joblib")

# Find most similar engines

In [None]:
historic_predicted_RUL = {}
for engine in train_df["unit"].unique():
    historic_predicted_RUL[engine] = list(model.predict(train_df[train_df["unit"] == engine].drop(columns = ["unit", "RUL"])))
    

In [None]:
max_sequence = max([len(historic_predicted_RUL[engine]) for engine in historic_predicted_RUL.keys()])

for engine in historic_predicted_RUL.keys():
    historic_predicted_RUL[engine] = historic_predicted_RUL[engine] + int((max_sequence - len(historic_predicted_RUL[engine])))*[0]

In [None]:
def find_most_similar(sequence, history):
    most_similar = 0
    most_similar_distance = 0
    for engine in history.keys():
       
        distance = sum(abs(np.array(history[engine]) - np.array(sequence)))
        if distance == 0:
            continue
        elif distance < most_similar_distance or most_similar_distance == 0:
            most_similar_distance = distance
            most_similar = engine
    return most_similar, most_similar_distance

In [None]:
for i in historic_predicted_RUL.keys():
    most_similar, _ = find_most_similar(historic_predicted_RUL[i], historic_predicted_RUL)
    print("engine" + str(i))
    print("most similar: " + str(most_similar))
    plt.plot(historic_predicted_RUL[i])
    plt.plot(historic_predicted_RUL[most_similar])
    plt.show()

In [None]:
pickle.dump(historic_predicted_RUL, open(data_location + "\historic_RUL", "wb"))

# Visualizations for similarities on the demo set

In [None]:
demo_predicitons = {}
for engine in demo_df["unit"].unique():
    demo_predicitons[engine] = list(model.predict(demo_df[demo_df["unit"] == engine].drop(columns = ["unit", "RUL"])))
    demo_predicitons[engine] = demo_predicitons[engine] + int((max_sequence - len(demo_predicitons[engine])))*[0]

In [None]:
fig, ax = plt.subplots(5, 1, sharex='col', sharey='row', figsize = (15,25))
for c, engine in enumerate(demo_predicitons.keys()):
    most_similar, _ = find_most_similar(demo_predicitons[engine], historic_predicted_RUL)
#    ax[c, 1].subplots(5, 1, c+1, squeeze  = "False")
    ax[c].plot(demo_predicitons[engine], label = "Engine " + str(int(engine)))
    ax[c].plot(historic_predicted_RUL[most_similar], label = "Most similar : Engine " + str(int(most_similar)))
    ax[c].set_title("Predicted RUL for engine " + str(int(engine)))
    ax[c].legend()
    ax[c].set_xlabel("Cycle")
    ax[c].set_ylabel("Predicted RUL")
#    ax[c, 1].show()
    