In [67]:
import matplotlib.pyplot as plt
import seaborn as sns

# Data Processing
import numpy as np
import pandas as pd
import random
from sklearn import preprocessing
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

# Modeling 
from sklearn.ensemble import VotingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import cluster, accuracy_score, roc_auc_score
from sklearn.model_selection import cross_validate, GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model

# Other
import warnings
warnings.filterwarnings('ignore')

In [68]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sample_submission = pd.read_csv('sample_submission.csv')

train.head()

Unnamed: 0,id,product_code,loading,attribute_0,attribute_1,attribute_2,attribute_3,measurement_0,measurement_1,measurement_2,...,measurement_9,measurement_10,measurement_11,measurement_12,measurement_13,measurement_14,measurement_15,measurement_16,measurement_17,failure
0,0,A,80.1,material_7,material_8,9,5,7,8,4,...,10.672,15.859,17.594,15.193,15.029,,13.034,14.684,764.1,0
1,1,A,84.89,material_7,material_8,9,5,14,3,3,...,12.448,17.947,17.915,11.755,14.732,15.425,14.395,15.631,682.057,0
2,2,A,82.43,material_7,material_8,9,5,12,1,5,...,12.715,15.607,,13.798,16.711,18.631,14.094,17.946,663.376,0
3,3,A,101.07,material_7,material_8,9,5,13,2,6,...,12.471,16.346,18.377,10.02,15.25,15.562,16.154,17.172,826.282,0
4,4,A,188.06,material_7,material_8,9,5,9,2,8,...,10.337,17.082,19.932,12.428,16.182,12.76,13.153,16.412,579.885,0


In [69]:
id_var = ['id']
target= ['failure']
cat_vars = ['product_code','attribute_0','attribute_1']
num_vars = [v for v in test.columns if v not in id_var and v not in cat_vars]
predictors = cat_vars + num_vars

In [70]:
multi_imp = IterativeImputer(max_iter = 9, random_state = 42, verbose = 0, skip_complete = True, n_nearest_features = 10, tol = 0.001)
multi_imp.fit(train[num_vars])
train[num_vars] = multi_imp.transform(train[num_vars])
test[num_vars] = multi_imp.transform(test[num_vars])

In [71]:
attributes = ['attribute_2', 'attribute_3', 'measurement_4', 'measurement_5', 'measurement_6']
train[attributes] = preprocessing.normalize(train[attributes])
test[attributes] = preprocessing.normalize(test[attributes])

In [72]:
test = test.drop(['product_code'], axis = 1)
train = train.drop(['product_code'], axis = 1)
cat_vars.remove('product_code')

In [73]:
for v in cat_vars:
    tempdf = pd.get_dummies(train[v], prefix = v)
    tempdf_test = pd.get_dummies(test[v], prefix = v)
    train = pd.merge(left = train, right = tempdf, left_index = True, right_index = True)
    test = pd.merge(left = test, right = tempdf_test, left_index = True, right_index = True)
train = train.drop(cat_vars, axis = 1)
test = test.drop(cat_vars, axis = 1)

In [74]:
predictors = [v for v in train.columns if v not in id_var and v not in target]

y_class = LabelEncoder().fit_transform(train[target])

# Train test split
X_train, X_test, y_train, y_test, y_train_class, y_test_class = train_test_split(train[predictors], train[target], y_class, test_size=0.2, random_state=42)

In [75]:
print(X_train.shape[1])

26


In [76]:
visible = Input(shape=(X_train.shape[1],))
layer = Dense(128, activation='relu', kernel_initializer='normal')(visible)
layer = Dense(64, activation='relu', kernel_initializer='normal')(layer)
layer = Dense(32, activation='relu', kernel_initializer='normal')(layer)
layer = Dense(16, activation='relu', kernel_initializer='normal')(layer)
layer = Dense(8, activation='relu', kernel_initializer='normal')(layer)

# regression + classification
out_reg = Dense(1, activation='linear')(layer)
out_clas = Dense(2, activation='softmax')(layer)

model = Model(inputs=visible, outputs=[out_reg, out_clas])
model.compile(loss='mse', optimizer='adam')
#plot_model(model, to_file='model.png', show_shapes=True)

In [77]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=7, restore_best_weights=True)

In [78]:
model.fit(X_train, [y_train,y_train_class], epochs=150, batch_size=32, callbacks=[callback], verbose=2)

Epoch 1/150
665/665 - 2s - loss: 0.4188 - dense_40_loss: 0.1687 - dense_41_loss: 0.2501 - 2s/epoch - 3ms/step
Epoch 2/150
665/665 - 1s - loss: 0.4179 - dense_40_loss: 0.1679 - dense_41_loss: 0.2500 - 892ms/epoch - 1ms/step
Epoch 3/150
665/665 - 1s - loss: 0.4174 - dense_40_loss: 0.1674 - dense_41_loss: 0.2500 - 991ms/epoch - 1ms/step
Epoch 4/150
665/665 - 1s - loss: 0.4175 - dense_40_loss: 0.1675 - dense_41_loss: 0.2500 - 933ms/epoch - 1ms/step
Epoch 5/150
665/665 - 1s - loss: 0.4173 - dense_40_loss: 0.1673 - dense_41_loss: 0.2500 - 1s/epoch - 2ms/step
Epoch 6/150
665/665 - 1s - loss: 0.4172 - dense_40_loss: 0.1672 - dense_41_loss: 0.2500 - 940ms/epoch - 1ms/step
Epoch 7/150
665/665 - 1s - loss: 0.4172 - dense_40_loss: 0.1672 - dense_41_loss: 0.2500 - 892ms/epoch - 1ms/step
Epoch 8/150
665/665 - 1s - loss: 0.4170 - dense_40_loss: 0.1670 - dense_41_loss: 0.2500 - 887ms/epoch - 1ms/step
Epoch 9/150
665/665 - 1s - loss: 0.4169 - dense_40_loss: 0.1669 - dense_41_loss: 0.2500 - 890ms/epoch 

<keras.callbacks.History at 0x1ec1e88b550>

In [79]:
model.save('model.h5')