In [1]:
print("start")
# !pip install keras2onnx
# !pip install onnxruntime
# !pip install onnxmltools
# !pip install tf2onnx
# !pip install tf2onnx onnx onnxruntime
print("end")


start
end


In [2]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

from tensorflow.keras import models
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import RMSprop,Adam
from tensorflow.keras.utils import to_categorical

#second model imports
from tensorflow.keras import datasets, layers
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D,BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D

from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix

import onnxruntime
import onnxmltools
import keras2onnx

import mlflow

np.set_printoptions(suppress=True)


<div style="line-height:1.75;background:#1e7b1e;padding-left:20px;padding-top:5px;padding-bottom:5px;border-radius:5px 5px 0px 0px">
<i style="font-size:40px;color:#c1f0c1;">Global Functions and Variables</i>
   
</div>
<div>
<p style="line-height:1.75;font-size:16px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
Global Functions and Variables, below you can find functions for building the models and the data<br>
</p>
</div>

### make sure to clean data_v1, data_v2 and dvc_repository directories before continue


In [3]:
# check the current directory
base_directory = %pwd
base_directory

'C:\\working\\gitos\\keras-onnx'

In [4]:
models_path = 'models/'
data_v1_path = 'data_v1/'
data_v2_path = 'data_v2/'
dvc_repository_path = 'dvc_repository'
raw_data_path = 'raw_data/'
mlflow_path = 'mlflow/'

# os.listdir(raw_data_path)

In [5]:
emotions = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral'}

In [6]:

# create_model
#------------------------------

def create_model_ver_1():
    model = models.Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 1)))
    model.add(MaxPool2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPool2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(7, activation='softmax'))
    model.compile(optimizer=Adam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model


# create_model version 2
#------------------------------
def create_model_ver_2():
    model = models.Sequential()
    model.add(layers.Conv2D(64, (1, 1), padding='same', activation='relu', input_shape=(48, 48, 1)))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))

    model.add(layers.Conv2D(128, (3, 3),padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    model.add(layers.Conv2D(256, (5, 5),padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2),padding="same"))
    model.add(Dropout(0.25))
    model.add(layers.Flatten())
    model.add(layers.Dense(128))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(layers.Dense(256))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(layers.Dense(7, activation='softmax'))
    model.summary()
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model


# prepare_data
#------------------------------

def prepare_data(data):
    """ Prepare data for modeling 
        input: data frame with labels und pixel data
        output: image and label array """
    
    image_array = np.zeros(shape=(len(data), 48, 48))
    image_label = np.array(list(map(int, data['Emotion'])))
    
    for i, row in enumerate(data.index):
        image = np.fromstring(data.loc[row, 'Pixels'], dtype=int, sep=' ')
        image = np.reshape(image, (48, 48))
        image_array[i] = image
        
    return image_array, image_label


# data_to_tf_data
#-----------------------------

def data_to_tf_data(df):
    image_array, image_label = prepare_data(df)
    images = image_array.reshape((image_array.shape[0], 48, 48, 1))
    images = images.astype('float32')/255
    labels = to_categorical(image_label)
    return images, labels

# plot_examples
#-----------------------------

def plot_examples(data,train_images,train_labels,  label=0):
    fig, axs = plt.subplots(1, 5, figsize=(25, 12))
    fig.subplots_adjust(hspace = .2, wspace=.2)
    axs = axs.ravel()
    for i in range(5):
        idx = data[data['Emotion']==label].index[i]
        axs[i].imshow(train_images[idx][:,:,0], cmap='gray')
        axs[i].set_title(emotions[train_labels[idx].argmax()])
        axs[i].set_xticklabels([])
        axs[i].set_yticklabels([])

# plot_all_emotions
#-----------------------------        

def plot_all_emotions(data,train_images, train_labels):
    fig, axs = plt.subplots(1, 7, figsize=(30, 12))
    fig.subplots_adjust(hspace = .2, wspace=.2)
    axs = axs.ravel()
    for i in range(7):
        idx = data[data['Emotion']==i].index[i]
        axs[i].imshow(train_images[idx][:,:,0], cmap='gray')
        axs[i].set_title(emotions[train_labels[idx].argmax()])
        axs[i].set_xticklabels([])
        axs[i].set_yticklabels([])
        

# plot_image_and_emotion
#------------------------------

def plot_image_and_emotion(test_image_array, test_image_label, pred_test_labels, image_number):
    """ Function to plot the image and compare the prediction results with the label """

    fig, axs = plt.subplots(1, 2, figsize=(12, 6), sharey=False)
    
    bar_label = emotions.values()
    
    axs[0].imshow(test_image_array[image_number], 'gray')
    axs[0].set_title(emotions[test_image_label[image_number]])
    
    axs[1].bar(bar_label, pred_test_labels[image_number], color='orange', alpha=0.7)
    axs[1].grid()
    
    plt.show()
    
    
# plot_compare_distributions
#------------------------------

def plot_compare_distributions(array1, array2, title1='', title2=''):
    df_array1 = pd.DataFrame()
    df_array2 = pd.DataFrame()
    df_array1['emotion'] = array1.argmax(axis=1)
    df_array2['emotion'] = array2.argmax(axis=1)
    
    fig, axs = plt.subplots(1, 2, figsize=(12, 6), sharey=False)
    x = emotions.values()
    
    y = df_array1['emotion'].value_counts()
    keys_missed = list(set(emotions.keys()).difference(set(y.keys())))
    for key_missed in keys_missed:
        y[key_missed] = 0
    axs[0].bar(x, y.sort_index(), color='orange')
    axs[0].set_title(title1)
    axs[0].grid()
    
    y = df_array2['emotion'].value_counts()
    keys_missed = list(set(emotions.keys()).difference(set(y.keys())))
    for key_missed in keys_missed:
        y[key_missed] = 0
    axs[1].bar(x, y.sort_index())
    axs[1].set_title(title2)
    axs[1].grid()
    
    plt.show()

    
# save_to_onnx
#------------------------------

def save_to_onnx(model, output_onnx_model):
    onnx_model_to_save = keras2onnx.convert_keras(model, model.name)
    onnxmltools.utils.save_model(onnx_model_to_save, output_onnx_model)
    print(f'Save model to {output_onnx_model} - Done')

<div style="line-height:1.75;background:#1e7b1e;padding-left:20px;padding-top:5px;padding-bottom:5px;border-radius:5px 5px 0px 0px"><i style="font-size:40px;color:#c1f0c1;">Load and Prepare the Data</i>
   
</div>
<div>
<p style="line-height:1.75;font-size:16px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
Load and Prepare the data, this part is intended for the first training of the model, the first "fit" command<br>
Later in the main loop we load the data for each iteration
</p>
</div>



In [7]:
# since i had some issues with the initial training data i create my own initial training base on icml_face_data.csv

## NO NEED TO RUN I DID IT ONCE 

# df = pd.read_csv(raw_data_path + 'initial_training_data.csv')
# df.drop(' Usage',axis='columns', inplace=True)
# df.rename(columns={'emotion': 'Emotion', ' pixels': 'Pixels'}, inplace=True)
# df = df.sample(1000)
# df.to_csv(raw_data_path + 'initial_training_data.csv')

In [8]:
initial_train_data = pd.read_csv(raw_data_path + 'initial_training_data.csv')
initial_train_images, initial_train_labels = data_to_tf_data(initial_train_data)

validation_test_data = pd.read_csv(raw_data_path + 'validation_test_data.csv')
validation_test_images, validation_test_labels = data_to_tf_data(validation_test_data)

test_data = pd.read_csv(raw_data_path + 'test_data.csv')
test_images, test_labels = data_to_tf_data(test_data)

In [9]:
class_weight = dict(zip(range(0, 7), (((initial_train_data['Emotion'].value_counts()).sort_index())/len(initial_train_data['Emotion'])).tolist()))
class_weight

{0: 0.14641744548286603,
 1: 0.006230529595015576,
 2: 0.14953271028037382,
 3: 0.2834890965732087,
 4: 0.16822429906542055,
 5: 0.09345794392523364,
 6: 0.1526479750778816}

<div style="line-height:1.75;background:#1e7b1e;padding-left:20px;padding-top:5px;padding-bottom:5px;border-radius:5px 5px 0px 0px">
<i style="font-size:40px;color:#c1f0c1;">Build and run the model</i>
   
</div>
<div>
<p style="line-height:1.75;font-size:16px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
Build and run the model<br>
</p>
</div>


<div>
<p style="line-height:1.75;font-size:20px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
<b>Model V1</b><br>
</p>
</div>



In [10]:
model_v1 = create_model_ver_1()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 46, 46, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 23, 23, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 21, 21, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 10, 10, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 8, 8, 64)          36928     
_________________________________________________________________
flatten (Flatten)            (None, 4096)              0         
_________________________________________________________________
dense (Dense)                (None, 64)                2

In [11]:
history = model_v1.fit(initial_train_images, 
                    initial_train_labels,
                    validation_data=(validation_test_images, validation_test_labels),
                    class_weight = class_weight,
                    epochs=12,
                    batch_size=64)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [12]:
test_loss, test_acc = model_v1.evaluate(test_images, test_labels)



In [13]:
model_v1.save('model/model_v1')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: model/model_v1\assets


<div>
<p style="line-height:1.75;font-size:20px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
<b>Model V2</b><br>
</p>
</div>

In [14]:
model_v2 = create_model_ver_2()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 48, 48, 64)        128       
_________________________________________________________________
batch_normalization (BatchNo (None, 48, 48, 64)        256       
_________________________________________________________________
dropout (Dropout)            (None, 48, 48, 64)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 48, 48, 128)       73856     
_________________________________________________________________
batch_normalization_1 (Batch (None, 48, 48, 128)       512       
_________________________________________________________________
dropout_1 (Dropout)          (None, 48, 48, 128)       0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 48, 48, 256)      

In [15]:
history = model_v2.fit(initial_train_images, 
                    initial_train_labels,
                    validation_data=(validation_test_images, validation_test_labels),
                    class_weight = class_weight,
                    epochs=2,  #should be 12
                    batch_size=64)

Epoch 1/2
Epoch 2/2


In [16]:
test_loss, test_acc = model_v2.evaluate(test_images, test_labels)



In [17]:
model_v2.save('model/model_v2')

INFO:tensorflow:Assets written to: model/model_v2\assets


<div style="line-height:1.75;background:#1e7b1e;padding-left:20px;padding-top:5px;padding-bottom:5px;border-radius:5px 5px 0px 0px">
<i style="font-size:40px;color:#c1f0c1;">Create CI/CD</i>
   
</div>
<div>
<p style="line-height:1.75;font-size:16px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
Create CI/CD<br>
</p>
</div>

<div>
<p style="line-height:1.75;font-size:20px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
<b>Replace between Model V1 and Model V2</b><br>
</p>
</div>


In [26]:
%cd {base_directory}

MODEL = model_v2
model_version = 'model_v2'
#%cd {data_v1_path}

#MODEL = model_v2
#%cd {data_v2_path}
#model_version = 'model_v2'

C:\working\gitos\keras-onnx


<div>
<p style="line-height:1.75;font-size:20px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
<b>Build the mlflow</b><br>
</p>
</div>

In [27]:
#init the mlflow - make sure to end the last run before starting new one
mlflow.create_experiment("uatt")
mlflow.set_experiment(model_version)
mlflow.log_param("Model name", MODEL.name)

<div>
<p style="line-height:1.75;font-size:20px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
<b>Init git and dvc</b><br>
</p>
</div>

In [20]:
!git config --global user.email "saharbnm@gmail.com"
!git config --global user.name "Sahar BnM"
!git init
!dvc init
!git commit -m "Initialize DVC"
!dvc remote add myRepo ..\dvc_repository -d
!git add .
!git commit -m "init"

Initialized empty Git repository in C:/working/gitos/keras-onnx/data_v1/.git/
Initialized DVC repository.

You can now commit the changes to git.

+---------------------------------------------------------------------+
|                                                                     |
|        DVC has enabled anonymous aggregate usage analytics.         |
|     Read the analytics documentation (and how to opt-out) here:     |
|             <https://dvc.org/doc/user-guide/analytics>              |
|                                                                     |
+---------------------------------------------------------------------+

What's next?
------------
- Check out the documentation: <https://dvc.org/doc>
- Get help and share ideas: <https://dvc.org/chat>
- Star us on GitHub: <https://github.com/iterative/dvc>
[master (root-commit) 32a1f29] Initialize DVC
 9 files changed, 515 insertions(+)
 create mode 100644 .dvc/.gitignore
 create mode 100644 .dvc/config
 create mode

The file will have its original line endings in your working directory
The file will have its original line endings in your working directory
The file will have its original line endings in your working directory


[master 5866b86] init
 8 files changed, 31 insertions(+)
 create mode 100644 mlruns/0/meta.yaml
 create mode 100644 mlruns/1/a9c3428ea19a40c0a0dae62e0857bdb2/meta.yaml
 create mode 100644 mlruns/1/a9c3428ea19a40c0a0dae62e0857bdb2/params/Model name
 create mode 100644 mlruns/1/a9c3428ea19a40c0a0dae62e0857bdb2/tags/mlflow.source.name
 create mode 100644 mlruns/1/a9c3428ea19a40c0a0dae62e0857bdb2/tags/mlflow.source.type
 create mode 100644 mlruns/1/a9c3428ea19a40c0a0dae62e0857bdb2/tags/mlflow.user
 create mode 100644 mlruns/1/meta.yaml


<div>
<p style="line-height:1.75;font-size:20px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
<b>Main loop run the flow on the selected Model</b><br>
</p>
</div>

In [21]:
mlflow.start_run()

Exception: Run with UUID a9c3428ea19a40c0a0dae62e0857bdb2 is already active. To start a new run, first end the current run with mlflow.end_run(). To start a nested run, call start_run with nested=True

In [None]:
# 1. load and merge the data
# 2. create branch and push to the branch and dvc
# 3. decide according to the score how to proceed drop or merge to master

initial_train_data = pd.read_csv('../' + raw_data_path + 'initial_training_data.csv')
initial_train_data.to_csv("merge_data.csv")

Last_score = 0
Grace_error = 0.005

for seq in range(7):
    
    #parameters
    branch_name = 'branch_' + str(seq)
    csv_file_name = 'data_gathered_at_' + str(seq) + '.csv'
    commit_str = 'commit merge data of initial_training_data and ' + csv_file_name
    
    print("-------------------------------")
    print(f'Loop number {seq}: branch name {branch_name}, file to load {csv_file_name}')
    print("-------------------------------")
    
    #make sure u start from the master branch
    !git checkout master
    !dvc pull
    
    #load data and merge to the initial training
    print("-------------------------------")
    initial_train_data = pd.read_csv("merge_data.csv")
    print(f'initial_train_data contain {len(initial_train_data)} samples')
    train_data = pd.read_csv('../' + raw_data_path + csv_file_name)
    print(f'train_data contain {len(train_data)} samples')
    train_data = pd.concat([initial_train_data, train_data])
    print("-------------------------------")
    
    #create new branch
    print("-------------------------------")
    print(f'Create new branch name {branch_name}')
    print("-------------------------------")
    !git checkout -b {branch_name}
    
    #save the new file to /data directory
    train_data.to_csv("merge_data.csv")
    train_data = pd.read_csv('merge_data.csv')
    print(f'mergedt data contain {len(train_data)} samples')
    
    
    !dvc add merge_data.csv
    !dvc push merge_data.csv
    !git add merge_data.csv.dvc
    !git commit -m "{commit_str}"
    
    #prepare the data
    print("-------------------------------")
    print(f'Prepare the data')
    print("-------------------------------")
    initial_train_images, initial_train_labels = data_to_tf_data(train_data)
    
    #run the model
    print("-------------------------------")
    print(f'run the model')
    print("-------------------------------")
    history = MODEL.fit(initial_train_images, 
                    initial_train_labels,
                    validation_data=(validation_test_images, validation_test_labels),
                    class_weight = class_weight,
                    epochs=2, # should be 12
                    batch_size=64)
    
    test_loss, test_acc = MODEL.evaluate(test_images, test_labels)
    print("-------------------------------")
    print(f'The score is {test_acc}')
    print("-------------------------------")
    
    !git checkout master
    
    if test_acc >= (Last_score - Grace_error):
        print("-------------------------------")
        print(f'!!! Good score {test_acc} (last={Last_score}) merge {branch_name} to the master')
        print("-------------------------------")
        !git merge {branch_name}
        
    else:
        print("-------------------------------")
        print(f'!!! Bad score {test_acc} (last={Last_score}) delete {branch_name}')
        print("-------------------------------")
    
    if test_acc >= (Last_score - Grace_error):
        Last_score = test_acc

    print("-------------------------------")
    print(f'Deleting branch {branch_name}')
    print("-------------------------------")
    !git branch -D {branch_name} 
    
    #update the mlflow
    mlflow.log_metric(key="Accuracy", value=test_acc, step=seq)
              
    ### END OF LOOP
    

In [24]:
mlflow.end_run()

MlflowException: Run 'a9c3428ea19a40c0a0dae62e0857bdb2' not found

In [None]:
Last_score

In [None]:
!git checkout master

In [None]:
train_data = pd.read_csv('merge_data.csv')
len(train_data)

In [None]:
pred_test_labels = MODEL.predict(test_images)
pred_test_labels

<div style="line-height:1.75;background:#1e7b1e;padding-left:20px;padding-top:5px;padding-bottom:5px;border-radius:5px 5px 0px 0px">
<i style="font-size:40px;color:#c1f0c1;">Analyse Convergence</i>
   
</div>
<div>
<p style="line-height:1.75;font-size:16px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
Analyse Convergence<br>
</p>
</div>

In [None]:
loss = history.history['loss']
loss_val = history.history['val_loss']
epochs = range(1, len(loss)+1)
plt.plot(epochs, loss, 'bo', label='loss_train')
plt.plot(epochs, loss_val, 'b', label='loss_val')
plt.title('value of the loss function')
plt.xlabel('epochs')
plt.ylabel('value of the loss function')
plt.legend()
plt.grid()
plt.show()

In [None]:
acc = history.history['accuracy']
acc_val = history.history['val_accuracy']
epochs = range(1, len(loss)+1)
plt.plot(epochs, acc, 'bo', label='accuracy_train')
plt.plot(epochs, acc_val, 'b', label='accuracy_val')
plt.title('accuracy')
plt.xlabel('epochs')
plt.ylabel('value of accuracy')
plt.legend()
plt.grid()
plt.show()

In [None]:
test_data = pd.read_csv('merge_data.csv')
test_image_array, test_image_label = prepare_data(test_data)

In [None]:
plot_image_and_emotion(test_image_array, test_image_label, pred_test_labels, 150)
plot_compare_distributions(test_labels, pred_test_labels, title1='test labels', title2='predict labels')

## pred_test_labels = model.predict(test_images)
#np.around(pred_test_labels, decimals=4, out=None)
print(pred_test_labels)

In [None]:
conf_mat = confusion_matrix(test_labels.argmax(axis=1), pred_test_labels.argmax(axis=1))

fig, ax = plot_confusion_matrix(conf_mat=conf_mat,
                                show_normed=True,
                                show_absolute=False,
                                class_names=emotions.values(),
                                figsize=(8, 8))
fig.show()

In [None]:
plot_examples(initial_train_data, initial_train_images, initial_train_labels,label=0)

In [None]:
plot_all_emotions(initial_train_data, initial_train_images, initial_train_labels)

<div>
<p style="line-height:1.75;font-size:20px;background:#c1f0c1;padding:20px;border-radius:0px 0px 5px 5px">
<b>Save the notebook as html file</b><br>
</p>
</div>

In [None]:

!jupyter nbconvert --to html Main_notebook_model_run.ipynb