In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
print(tf.__version__)
assert tf.__version__ >= "2.0"

if not tf.config.list_physical_devices('GPU'):
    print("No GPU was detected. LSTMs and CNNs can be very slow without a GPU.")

# Common imports
import numpy as np
import os

sys.path.append('/content/gdrive/MyDrive/Colab Notebooks/myPy/')
# pandas
import pandas as pd
# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

import natsort
import re
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler, StandardScaler,RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.semi_supervised import LabelSpreading
from sklearn.metrics import classification_report, confusion_matrix
from scipy import stats
from sklearn.model_selection import StratifiedKFold,KFold
from dataset2 import feature_V_P, Labelspreading,data_set_dw
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
import time
from figsave import save_fig
from matplotlib import cm
import matplotlib
from sklearn.metrics import accuracy_score
from mlxtend.plotting import plot_decision_regions
from sklearn.metrics import mean_squared_error
import seaborn as sns

font = {'family': 'serif',
        'size': 16
        }
prop={'family': 'serif', 'size':12}
n = 3
colors = cm.viridis(np.linspace(0,1,n))


# Def


In [None]:
def rounded_accuracy(y_true, y_pred):
    return keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))

def layer_weight_plot(model, layer_num, figsize, cmap, norm, title, save,path):
    layer_w = model.layers[layer_num].get_weights()[0]
    layer_b = model.layers[layer_num].get_weights()[1]
    fig, ax = plt.subplots(figsize=figsize)
    interp = 'None'
    layer_w_im = ax.imshow(layer_w, origin='upper',interpolation=interp, aspect='auto',cmap=cmap, norm=norm)
    for (j, i),label in np.ndenumerate(layer_w):
        ax.text(i,j,round(label,2),ha='center',va='center',color = 'w', fontsize = 7)
    # ax.set_xticks(np.arange(0,range,1))
    ax.tick_params(axis='both', which='major', labelsize=12, direction='in')
    plt.title(title)
    cbar1 = fig.colorbar(layer_w_im)
    cbar1.set_label('Weights',fontdict=font)
    # cbar1.set_ticks(np.linspace(-3,2,11))
    cbar1.set_ticks(np.linspace(-1,1,11))
    if save:
        save_fig(image_path =image_save_path, fig_name = title,reselution=150)
    # plt.show()

    return layer_w, layer_b

def train_autoencoder(n_neurons, X_train, X_valid, loss, optimizer,
                      n_epochs=10, output_activation=None, metrics=None,tensorboard_cb = False, name = None, s_name = []):
    n_inputs = X_train.shape[-1]
    encoder = keras.models.Sequential([
        keras.layers.Dense(n_neurons, activation="selu", input_shape=[n_inputs],kernel_initializer="lecun_normal")
    ])
    encoder._name = s_name[0]

    decoder = keras.models.Sequential([
        keras.layers.Dense(n_inputs, activation=output_activation)
    ])

    decoder._name = s_name[1]

    autoencoder = keras.models.Sequential([encoder, decoder])
    autoencoder._name = s_name[2]

    autoencoder.compile(optimizer, loss, metrics=metrics)

    early_stopping_cb = keras.callbacks.EarlyStopping(monitor = 'val_loss',patience=3,restore_best_weights=True)

    if tensorboard_cb:
        tensorboard_cb    = TensorBoard(log_dir= '/content/gdrive/MyDrive/Colab Notebooks/greedy/logs/{}'.format(name),histogram_freq=1,embeddings_freq = 1)
        callbacks = [early_stopping_cb, tensorboard_cb]
    else:
        callbacks = [early_stopping_cb]

    autoencoder.fit(X_train, X_train, epochs=n_epochs,
                    validation_data=(X_valid, X_valid),
                    callbacks = callbacks)
    summary = autoencoder.summary()
    return encoder, decoder, encoder(X_train), encoder(X_valid), summary

# Data set

### read data


In [None]:
data_path       = '/content/gdrive/MyDrive/Colab Notebooks/MDK1/data_set/'
image_save_path = '/content/gdrive/MyDrive/Colab Notebooks/greedy/'
dsoll_list      = [4.8*np.sqrt(2),4.9*np.sqrt(2),5*np.sqrt(2),5.2*np.sqrt(2),5.1*np.sqrt(2),]
dsoll_list_str  = ['$4,8 \sqrt{t}$','$4,9 \sqrt{t}$','$5,0 \sqrt{t}$','$5,2 \sqrt{t}$','$5,1 \sqrt{t}$']
files           = [f for f in os.listdir(path = data_path) if 'nor' in f]
print(files)
scaler          = MaxAbsScaler()
cmap            = cm.viridis
norm            = matplotlib.colors.Normalize(vmin=-1, vmax=1)

In [None]:
np.random.seed(42)
tf.random.set_seed(42)
feature_vb_sel_nor = pd.read_csv(os.path.join(data_path, 'feature_vb_pb_sel_nor.csv')).set_index('pktnum')
X_train, X_valid, _, _ = train_test_split(feature_vb_sel_nor, feature_vb_sel_nor,test_size = 0.25, )
print(X_train.head())
# greedy1 = 'schweiss-EA-greedy1'

# greedy2 = 'schweiss-EA-greedy2'

greedy1 = 'schweiss-EA-greedy1-{}'.format(int(time.time()))
greedy2 = 'schweiss-EA-greedy2-{}'.format(int(time.time()))

lbsp_nor = [f for f in files if 'MDK' in f]
print(lbsp_nor)
i = 4

print(lbsp_nor[i])
MDK1_lbsp_nor = pd.read_csv(data_path + lbsp_nor[i])
# print(MDK1_lbsp_nor)
X_full, y_full = MDK1_lbsp_nor.drop(columns='dw'), MDK1_lbsp_nor.dw
X_train_lb, X_test_lb, y_train_lb, y_test_lb = train_test_split(X_full, y_full,test_size = 0.25)

# Unsupervised Pre training

In [None]:
enc1, dec1, X_train_enc1, X_valid_enc1, summary_1 = train_autoencoder(
n_neurons         = 10, 
X_train           = X_train.values, 
X_valid           = X_valid.values, 
loss              = 'binary_crossentropy',
optimizer         = keras.optimizers.Adam(),
output_activation = 'sigmoid',
metrics           = [rounded_accuracy],
n_epochs          = 20,
tensorboard_cb    = False,
name              = greedy1,
s_name = ['enc1', 'dec1', 'autoenc1']
)

enc2, dec2, X_train_enc2, X_valid_enc2, summary_2 = train_autoencoder(
n_neurons         = 6,
X_train           = X_train_enc1,
X_valid           = X_valid_enc1,
loss              = 'mse',
optimizer         = keras.optimizers.Adam(),
output_activation = 'selu',
n_epochs          = 20,
tensorboard_cb    = False,
name              = greedy2,
s_name = ['enc2', 'dec2', 'autoenc2']
)

stacked_ae_1_by_1 = keras.models.Sequential([
    enc1, enc2, dec2, dec1,
])
stacked_ae_1_by_1._name = 'final_autoenc'

print(stacked_ae_1_by_1.summary())

In [None]:
pred_df = pd.DataFrame(stacked_ae_1_by_1.predict(X_valid), index = X_valid.index, columns = X_valid.columns)
mse = [mean_squared_error(X_valid.iloc[row].values,pred_df.iloc[row].values) for row in range(len(X_valid.index))]
fig=plt.figure(figsize=(5,3.5))

ax=fig.add_subplot(111, label="1")
ax2=fig.add_subplot(111, label="2", frame_on=False)

ax.plot(mse, 'o', color = colors[0],mec = 'k', alpha = 0.7, markersize = 7)
ax.set_xlabel('Validierte Daten',fontdict = font,color=colors[0])
ax.set_ylabel('MSE', fontdict = font,color=colors[0])
ax.tick_params(axis='x', colors=colors[0])
ax.tick_params(axis='y', colors=colors[0])
ax.tick_params(axis='both', which='major', labelsize=12,direction='in')
ax.ticklabel_format(axis="y", style="sci", scilimits=(2,1))
# ax.set_ylim(0,1.2e-2)
ax.set_xlim(0)

ax2 = sns.distplot(np.array(mse), rug=True,color = '#287C8EFF',hist=True, kde=False)
ax2.xaxis.tick_top()
ax2.yaxis.tick_right()
# ax2.set_xlim(0,1.2e-2)
ax2.set_ylim(0,80)
ax2.set_xlabel('MSE',fontdict = font,color='#287C8EFF')
ax2.set_ylabel('Anzahl',fontdict = font,color='#287C8EFF')  
ax2.xaxis.set_label_position('top') 
ax2.yaxis.set_label_position('right') 
ax2.tick_params(axis='x', colors='#287C8EFF')
ax2.tick_params(axis='y', colors='#287C8EFF')
ax2.tick_params(axis='both', which='major', labelsize=12,direction='in',pad=15)
ax2.ticklabel_format(axis="y", style="sci", scilimits=(2,1))
ax2.ticklabel_format(axis="x", style="sci", scilimits=(2,1))
save_fig(image_path = image_save_path, fig_name = 'stackedautoencoder_valid_mse', reselution=150)
# plt.show()

# Autoencoder weights

In [None]:
enc1_ly_w, enc1_ly_b = layer_weight_plot(model = enc1,layer_num = 0, 
                                        figsize = (5,4), cmap = cmap, 
                                        norm=norm,title = '1-ten Encoder Layer 1',
                                        save = False,
                                        path = image_save_path,
                                        )

enc2_ly_w, enc2_ly_b = layer_weight_plot(model = enc2,layer_num = 0, 
                                        figsize = (5,4), cmap = cmap, 
                                        norm=norm,title = '2-ten Encoder Layer 1',
                                        save = False,
                                        path = image_save_path,
                                        )

# Fein Turning
enc1 and enc2 locked

In [None]:
tf.keras.backend.clear_session()
pretrained_clf = keras.models.Sequential([
    keras.Input(shape=X_train.shape[1:]),
    enc1,
    enc2,
    keras.layers.Dense(3,activation='selu', kernel_initializer = 'lecun_normal'),
    keras.layers.Dense(1, activation='sigmoid')
])
pretrained_clf._name = 'fein_turning'

enc1.trainable = False
enc2.trainable = False

pretrained_clf.compile(loss="binary_crossentropy",
                       optimizer=keras.optimizers.Adam(),
                       metrics=["accuracy"])


history1 = pretrained_clf.fit(X_train_lb, y_train_lb, epochs=80,
                             validation_data=(X_test_lb, y_test_lb))



In [None]:
enc1.trainable = True
enc2.trainable = True
pretrained_clf.compile(loss="binary_crossentropy",
                       optimizer=keras.optimizers.Adam(),
                       metrics=["accuracy"])


NAME = 'schweiss-EA-Turinig_{}'.format(lbsp_nor[i].split('.')[0])
h5_name = "EA-Turinig_{}.h5".format(lbsp_nor[i].split('.')[0])

checkpoint_filepath = '/content/gdrive/MyDrive/Colab Notebooks/greedy/'
checkpoint_cb     = ModelCheckpoint(filepath =checkpoint_filepath+h5_name, save_best_only=True,)
early_stopping_cb = EarlyStopping(monitor= 'val_loss',patience=5,restore_best_weights=True)
tensorboard_cb    = TensorBoard(log_dir= '/content/gdrive/MyDrive/Colab Notebooks/greedy/logs/{}'.format(NAME),histogram_freq=1,embeddings_freq = 1)
callbacks        = [early_stopping_cb,checkpoint_cb,tensorboard_cb]

history2 = pretrained_clf.fit(X_train_lb, y_train_lb, 
                            epochs=500, 
                            validation_data=(X_test_lb, y_test_lb),
                            callbacks=callbacks
                            )

In [None]:
h5_name = "EA-Turinig_{}.h5".format(lbsp_nor[i].split('.')[0])
model  = keras.models.load_model(checkpoint_filepath + h5_name)

In [None]:
feature_test_nor = pd.read_csv(data_path + 'feature_test_nor.csv').set_index('pktnum')
loss, acc = model.evaluate(X_test_lb,  y_test_lb, verbose=2)
print('Restored model, accuracy: {:5.2f}%'.format(100*acc))
print(model.summary())

In [None]:
print(dsoll_list[i])
test_dw = pd.read_excel(os.path.join(data_path,'Torsionspruefung_MDK1.xlsx'))
test_dw = test_dw.iloc[:-8].set_index('Punktnummer simuliert')
test_dw = [1 if dm >= dsoll_list[i] else 0 for dm in test_dw.dm_Korr.values]
y_true = np.array(test_dw)
print(y_true)
print('---------------------------------')
y_pred = (model.predict(feature_test_nor) > 0.5).astype("int32").reshape(feature_test_nor.shape[0],)
print(y_pred)
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
print('Genauigkeit: {}'.format(accuracy_score(y_true,y_pred)))

In [None]:
dsoll_list

1.   dsoll = 4.8 acc = 0.8522727272727273 var = 94.59% ep = 5 var = 43.23%
2.   dsoll = 4.9 acc = 0.8636363636363636 var = 100% ep = 5 var = 56.44%
3.   dsoll = 5.0 acc =  0.7727272727272727 var = 94.74% ep = 5 var = 68.42%
4.   dsoll = 5.2 acc = 0.625 var = 94.44% ep = 5 var =   69.44%
5.   dsoll = 5.1 acc =  0.7045454545454546 var = 100% ep = 5 var =  81.08%
 





In [None]:
pretrained_clf_ly1_w, pretrained_clf_ly1_b = layer_weight_plot(model = model,layer_num = 0, 
                                                            figsize = (5,4), cmap = cmap, 
                                                            norm=norm,title = 'Fein-Turning Layer 1 {}'.format(lbsp_nor[i].split('.')[0]),
                                                            save = True,
                                                            path = image_save_path)

pretrained_clf_ly2_w, pretrained_clf_ly2_b = layer_weight_plot(model = model,layer_num = 1, 
                                                            figsize = (5,4), cmap = cmap, 
                                                            norm=norm,title = 'Fein-Turning Layer 2{}'.format(lbsp_nor[i].split('.')[0]),
                                                            save = True,
                                                            path = image_save_path)


# Tensorboard


In [None]:
%load_ext tensorboard
%tensorboard --logdir='/content/gdrive/MyDrive/Colab Notebooks/greedy/logs/'