# JupyterHub Notebook

### This notebook server is hosted on the OpenShift platform which provides a separate server for individual user. The platform take care about the provisioning of the server and allocating related to storage.

In [38]:
! pip install Minio

You should consider upgrading via the '/opt/app-root/bin/python3 -m pip install --upgrade pip' command.[0m


In [39]:
from minio import Minio
from minio.error import ResponseError
import os
import os.path

import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow import feature_column
from tensorboard.plugins.hparams import api as hp

# Define some functions

In [40]:
def get_s3_server():
    minioClient = Minio('minio-ml-workshop:9000',
                    access_key='minio',
                    secret_key='minio123',
                    secure=False)

    return minioClient

def upload_learning_stats_to_s3(folder_name):
    minioClient = get_s3_server()

    files = []
    for r, d, f in os.walk(folder_name):
        for file in f:
            files.append(os.path.join(r, file))

    for f in files:
        print(f)
        minioClient.fput_object(bucket_name='model-stats', object_name="tensordata/"  + f , file_path='./' + f)


#%%

def download_all_files(bucket_name):
    minioClient = get_s3_server()
    objects = minioClient.list_objects_v2(bucket_name=bucket_name,
                                          recursive=True)
    for obj in objects:
        # print(obj.bucket_name, obj.object_name.encode('utf-8'), obj.last_modified,
        #       obj.etag, obj.size, obj.content_type)
        try:
            minioClient.fget_object(obj.bucket_name, obj.object_name,
                                          '/tmp/' + os.path.basename(obj.object_name))
            # print(minioClient.fget_object(obj.bucket_name, obj.object_name,
            #                               '/tmp/' + os.path.basename(obj.object_name)))
        except ResponseError as err:
            print(err)


#%%

def get_run_logdir(root_logdir):
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)


def deploy_model(model_name):
    minioClient = get_s3_server()

    files = []
    folder_name = model_name
    for r, d, f in os.walk(folder_name):
        for file in f:
                files.append(os.path.join(r, file))

    for f in files:
        print(f)
        minioClient.fput_object(bucket_name='models', object_name=f , file_path='./' + f)


# Fetch Data from S3 bucket hosted on the OpenShift Platform

In [41]:
root_logdir = "."

# https://www.tensorflow.org/tensorboard/hyperparameter_tuning_with_hparams



print(tf.keras.__version__)
print(tf.__version__)





csv_columns = ["row_number", "Time", "V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13", "V14", "V15",
               "V16", "V17",  "Amount", "Class", "V18", "V19", "V20", "V21", "V22", "V23", "V24", "V25", "V26", "V27", "V28" ]

download_all_files('data')

source = '/tmp'
for file in os.listdir(source):
    if 'part-0011' in file:
        old_file = os.path.join("/tmp", file)
        new_file = os.path.join("/tmp", "test-" + file)
        os.rename(old_file, new_file)
        continue
    if 'part-0012' in file:
        old_file = os.path.join("/tmp", file)
        new_file = os.path.join("/tmp", "validate-" + file)
        os.rename(old_file, new_file)
        continue


#%%




csv_files = "/tmp/*.csv"
dataset = tf.data.experimental.make_csv_dataset(csv_files, column_names=csv_columns, batch_size=1024, header=False,
                                                label_name="Class",
                                                select_columns=["V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9",
                                                                "V10", "V11", "V12", "V13", "V14", "V15", "V16", "V17",
                                                                "V18", "V19", "V20", "V21", "V22", "V23", "V24", "V25",
                                                                "V26", "V27", "V28", "Amount", "Class"])
test_dataset = tf.data.experimental.make_csv_dataset("/tmp/test-*.csv", batch_size=1024, column_names=csv_columns, header=False,
                                                label_name="Class",
                                                select_columns=["V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9",
                                                                "V10", "V11", "V12", "V13", "V14", "V15", "V16", "V17",
                                                                "V18", "V19", "V20", "V21", "V22", "V23", "V24", "V25",
                                                                "V26", "V27", "V28", "Amount", "Class"])

validate_dataset = tf.data.experimental.make_csv_dataset("/tmp/validate-*.csv", batch_size=1024, column_names=csv_columns, header=False,
                                                label_name="Class",
                                                select_columns=["V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9",
                                                                "V10", "V11", "V12", "V13", "V14", "V15", "V16", "V17",
                                                                "V18", "V19", "V20", "V21", "V22", "V23", "V24", "V25",
                                                                "V26", "V27", "V28", "Amount", "Class"])

feature_columns = []
for fc in ["V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9",
           "V10", "V11", "V12", "V13", "V14", "V15", "V16", "V17",
           "V18", "V19", "V20", "V21", "V22", "V23", "V24", "V25",
           "V26", "V27", "V28", "Amount"]:
    feature_columns.append(feature_column.numeric_column(fc, shape=()))


print("Data Fetched")


2.2.4-tf
2.1.0
Data Fetched


# Define Model

In [42]:
model = tf.keras.models.Sequential()
model.add(keras.layers.DenseFeatures(feature_columns))
model.add(keras.layers.Dense(50,   activation="tanh"))
keras.layers.Dropout(rate=0.2),
model.add(keras.layers.Dense(50, activation="tanh"))
keras.layers.Dropout(rate=0.2),
model.add(keras.layers.Dense(20, activation="tanh"))
keras.layers.Dropout(rate=0.2),
# 1 becuase the output is fraud or not
model.add(keras.layers.Dense(1, activation="sigmoid"))


# Compile Model

In [44]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=["accuracy"])

# Train your model
### The CPU and GPU and Machine is alloated on demand by the OpenShift Platform

In [45]:
steps_per_epoch = 280000 / 1024
validation_steps = 4000/1024

# tensor board
run_logdir = get_run_logdir(root_logdir)

# tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
tensorboard_cb = keras.callbacks.TensorBoard(log_dir=run_logdir,
                                             histogram_freq=2, write_graph=True, write_images=True,
                                             update_freq='epoch', profile_batch=2, embeddings_freq=0,
                                             embeddings_metadata=None)

history = model.fit(dataset, validation_data=validate_dataset, validation_steps=validation_steps,   
                    steps_per_epoch=steps_per_epoch, epochs=1, callbacks=[tensorboard_cb])


Train for 273.4375 steps, validate for 3.90625 steps


# Evaluate your model and measure accuracy

In [46]:
evaluation_result = model.evaluate(test_dataset, steps= validation_steps)
print('test loss, test acc:', evaluation_result)


test loss, test acc: [0.0014899923466145993, 0.9987793]


# Upload Model to Visualise its internals. The visualisation server is hosted on OpenShift Platform

In [47]:
upload_learning_stats_to_s3(run_logdir.replace("./", "", 1))

run_2020_05_01-04_59_51/validation/events.out.tfevents.1588309208.jupyterhub-nb-fmasood.38.77777.v2
run_2020_05_01-04_59_51/train/events.out.tfevents.1588309192.jupyterhub-nb-fmasood.38.74350.v2
run_2020_05_01-04_59_51/train/events.out.tfevents.1588309193.jupyterhub-nb-fmasood.profile-empty
run_2020_05_01-04_59_51/train/plugins/profile/2020-05-01_04-59-53/local.trace


# Upload the model to be used as an API. The API Server is hosted by OpenShift

In [49]:
tf.saved_model.save(model, "./test_model/1", signatures=None)
deploy_model("test_model")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: ./test_model/1/assets
test_model/1/saved_model.pb
test_model/1/variables/variables.index
test_model/1/variables/variables.data-00000-of-00001
