# Imports

In [161]:
import numpy as np
import pandas as pd
from pandas.errors import ParserError
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import pickle

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import plot_confusion_matrix, accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, plot_roc_curve

In [162]:
feature_avg = pd.read_pickle('./features/cycle_mean.pkl')

In [163]:
feature_avg.head()

Unnamed: 0,CE,CP,EPS1,FS1,FS2,PS1,PS2,PS3,PS4,PS5,PS6,SE,TS1,TS2,TS3,TS4,VS1
0,39.60135,1.86275,2538.929167,6.709815,10.304592,160.673492,109.466914,1.991475,0.0,9.842169,9.728098,59.157183,35.621983,40.978767,38.471017,31.74525,0.57695
1,25.786433,1.25555,2531.4989,6.715315,10.403098,160.60332,109.35489,1.976234,0.0,9.635142,9.529488,59.335617,36.676967,41.532767,38.978967,34.493867,0.56585
2,22.218233,1.113217,2519.928,6.718522,10.36625,160.34772,109.158845,1.972224,0.0,9.530548,9.427949,59.54315,37.8808,42.44245,39.63195,35.64615,0.576533
3,20.459817,1.06215,2511.541633,6.720565,10.302678,160.188088,109.064807,1.946575,0.0,9.438827,9.33743,59.7949,38.87905,43.403983,40.403383,36.579467,0.569267
4,19.787017,1.070467,2503.4495,6.690308,10.23775,160.000472,108.931434,1.922707,0.0,9.358762,9.260636,59.455267,39.803917,44.33275,41.31055,37.4279,0.577367


In [164]:
with open('./features/cond_encoding.pkl', 'rb') as file:
    encoding = pickle.load(file)

In [165]:
target_cols = list(encoding.keys())
df = pd.read_csv('./data/profile.txt', sep='\t', header = None, names = target_cols)

In [166]:
df.head()

Unnamed: 0,Cooler Condition / %,Valve Condition / %,Internal pump leakage,Hydraulic accumulator / bar,stable flag
0,3,100,0,130,1
1,3,100,0,130,1
2,3,100,0,130,1
3,3,100,0,130,1
4,3,100,0,130,1


In [167]:
model1_X = feature_avg[list(feature_avg.columns)]
model1_y = df['Valve Condition / %']

In [168]:
%reload_ext autoreload
%autoreload 1 
%aimport modeling

In [169]:
from modeling import RegularModel

In [170]:
model1 = RegularModel(model1_X, model1_y, 'Simple Averages', 'Valve Condition / %')

In [171]:
model1.fit_model()



<modeling.RegularModel at 0x7f96e831ddf0>

In [172]:
model1.print_m()

Feature type: Simple Averages
Target Variable: Valve Condition / %
Logistic Regression Test Accuracy: 0.8700906344410876
Logistic Regression Best Params: {'lr__C': 1.0, 'lr__penalty': 'l1', 'lr__solver': 'liblinear'}

Decision Trees Test Accuracy: 0.797583081570997
Decision Trees Best Params: {'dtree__criterion': 'gini', 'dtree__max_depth': 6, 'dtree__min_samples_leaf': 1, 'dtree__min_samples_split': 6}

Random Forest Test Accuracy: 0.8610271903323263
Random Forest Best Params: {'rf__max_depth': 6, 'rf__min_samples_leaf': 1, 'rf__min_samples_split': 2}

K-Nearest Neighbors Test Accuracy: 0.918429003021148
K-Nearest Neighbors Best Params: {'knn__metric': 'manhattan', 'knn__n_neighbors': 1, 'knn__weights': 'uniform'}

Support Vector Machines Test Accuracy: 0.9788519637462235
Support Vector Machines Best Params: {'svm__C': 6, 'svm__kernel': 'linear'}

XGBoost Test Accuracy: 0.972809667673716
XGBoost Best Params: {'xgb__learning_rate': 0.2, 'xgb__max_depth': 5, 'xgb__min_child_weight'

In [173]:
from joblib import dump, load

In [174]:
dump(model1, 'model1.joblib')

['model1.joblib']

In [175]:
model1_new = load('model1.joblib')

In [176]:
model1_new.print_m()

Feature type: Simple Averages
Target Variable: Valve Condition / %
Logistic Regression Test Accuracy: 0.8700906344410876
Logistic Regression Best Params: {'lr__C': 1.0, 'lr__penalty': 'l1', 'lr__solver': 'liblinear'}

Decision Trees Test Accuracy: 0.797583081570997
Decision Trees Best Params: {'dtree__criterion': 'gini', 'dtree__max_depth': 6, 'dtree__min_samples_leaf': 1, 'dtree__min_samples_split': 6}

Random Forest Test Accuracy: 0.8610271903323263
Random Forest Best Params: {'rf__max_depth': 6, 'rf__min_samples_leaf': 1, 'rf__min_samples_split': 2}

K-Nearest Neighbors Test Accuracy: 0.918429003021148
K-Nearest Neighbors Best Params: {'knn__metric': 'manhattan', 'knn__n_neighbors': 1, 'knn__weights': 'uniform'}

Support Vector Machines Test Accuracy: 0.9788519637462235
Support Vector Machines Best Params: {'svm__C': 6, 'svm__kernel': 'linear'}

XGBoost Test Accuracy: 0.972809667673716
XGBoost Best Params: {'xgb__learning_rate': 0.2, 'xgb__max_depth': 5, 'xgb__min_child_weight'