In [3]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler

from sklearn.linear_model import LinearRegression, LogisticRegressionCV, LogisticRegression
from sklearn import svm, tree

In [4]:
#Import data and give column names

train = pd.read_csv('../../data/feature_selected_train_FD001.csv', sep=',', header=0, engine='python')
test = pd.read_csv('../../data/feature_selected_test_FD001.csv', sep=',', header=0, engine='python')
y_test = pd.read_csv('../../data/RUL_FD001.csv', sep=',', header=0,  engine='python')

index_names = train.columns[[0, 1]]
setting_names = train.columns[[2]]
sensor_names = train.drop(index_names.union(setting_names), axis = 1).columns # Find something better than union!!
scale_columns = sensor_names
keep_columns = scale_columns.union(index_names[[1]])

In [5]:
def scale_data(train, test, columns):
    sc = MinMaxScaler()
    scaled_train = train.copy()
    scaled_test = test.copy()
    scaled_train[columns] = pd.DataFrame(sc.fit_transform(scaled_train[columns]))
    scaled_test[columns] = pd.DataFrame(sc.transform(scaled_test[columns]))
    return scaled_train, scaled_test

def add_remaining_useful_life(df):
    grouped_by_unit = df.groupby(by="unit_no")
    max_cycle = grouped_by_unit["time_cycles"].max()
    
    result_frame = df.merge(max_cycle.to_frame(name='max_cycle'), left_on='unit_no', right_index=True)
    remaining_useful_life = result_frame["max_cycle"] - result_frame["time_cycles"]
    result_frame["RUL"] = remaining_useful_life
    
    result_frame = result_frame.drop("max_cycle", axis=1)
    return result_frame
def plot_loss(history):
    plt.figure(figsize=(13,5))
    plt.plot(range(1, len(history.history['loss'])+1), history.history['loss'], label='train')
    plt.plot(range(1, len(history.history['val_loss'])+1), history.history['val_loss'], label='validate')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

def evaluate(y_true, y_hat, label='test'):
    mse = mean_squared_error(y_true, y_hat)
    rmse = np.sqrt(mse)
    variance = r2_score(y_true, y_hat)
    print('{} set RMSE:{}, R2:{}'.format(label, rmse, variance))
    
def plot_predictions(y_true, y_predicted):
    plt.figure(figsize=(13,5))
    plt.plot(y_true, label='true')
    plt.plot(y_predicted, label='predicted')
    plt.xlabel('Predictions')
    plt.ylabel('Predicted Values')
    plt.legend()
    plt.show()

In [6]:
x_train = add_remaining_useful_life(train)
y_train = x_train.pop('RUL')
x_test = test.groupby(by="unit_no").last().reset_index()
x_train_scaled, x_test_scaled = scale_data(x_train, x_test, scale_columns)

In [31]:
models = [
          ('Linear Regression', LinearRegression()), 
          ('Logistic Regression', LogisticRegression()), 
          ('SVM', svm.SVC()),
          ('Decision Tree', tree.DecisionTreeRegressor())
         ]
results = []
names = []
for name, model in models:
    clf = model.fit(x_train_scaled, y_train)
    y_hat_train = clf.predict(x_train_scaled)
    print(name)
    evaluate(y_train, y_hat_train, 'train')
    
    y_hat_test = clf.predict(x_test_scaled)
    evaluate(y_test, y_hat_test)
    
    #results.append(cv_results)
    #names.append(name)
    
    #this_df = pd.DataFrame(cv_results)
    #this_df['model'] = name
    #dfs.append(this_df)
#final = pd.concat(dfs, ignore_index=True)

LinearRegression
train set RMSE:39.59310521746384, R2:0.669583736222076
test set RMSE:36.384392626609255, R2:0.23339664627412604
SVM
train set RMSE:50.787367009553414, R2:0.4563321092027983
test set RMSE:39.36089429878341, R2:0.10283903312676279
Decision Tree
train set RMSE:0.0, R2:1.0
test set RMSE:45.49175749517708, R2:-0.19841127552422044
