In [14]:
import tensorflow
from tensorflow.keras.layers import AveragePooling2D, Dropout, Flatten, Dense, Input, BatchNormalization
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import optimizers, models, layers
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications import ResNet50V2

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import io
from PIL import Image

from minio import Minio
# from minio.error import ResponseError

from kubeflow.metadata import metadata
from datetime import datetime
from uuid import uuid4

# Data Preparation

In [16]:
print("Loading images...")

minioClient = Minio(
                'minio-service.kubeflow:9000',
                access_key='minio', 
                secret_key='minio123', 
                secure=False
            )

data = []
labels = []

# read all X-Rays in the specified path, and resize them all to 256x256

for i in minioClient.list_objects('dataset', prefix='covid19', recursive=True):
    label = i.object_name.split(os.path.sep)[-2]
    minioObj = minioClient.get_object('dataset', i.object_name)
    byteArray = minioObj.read()
    pil_image = Image.open(io.BytesIO(byteArray)).convert('RGB')
    image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
    image = cv2.resize(image, (256, 256))
    data.append(image)
    labels.append(label)

Loading images...


In [17]:
#normalise pixel values to real numbers between 0.0 - 1.0 
data = np.array(data) / 255.0
labels = np.array(labels)

# perform one-hot encoding for a 3-class labeling 
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(labels)
labels = to_categorical(integer_encoded)

print("... ... ", len(data), "images loaded in 3x classes:")
print(label_encoder.classes_)

... ...  227 images loaded in 3x classes:
['covid' 'normal' 'pneumonia_bac']


In [18]:
(x_train, x_val, y_train, y_val) = train_test_split(data, labels, test_size=0.20, stratify=labels)

### Create a medtadata - workspace

In [15]:
METADATA_STORE_HOST = "metadata-grpc-service.kubeflow" # default DNS of Kubeflow Metadata gRPC serivce.
METADATA_STORE_PORT = 8080
METADATA_WORKSPACE_NM = "WS_{:%Y.%m%d.%H%M}".format(datetime.now())
OWNER="architeam@samsung.com"

# metadata - Create a workspace
ws1 = metadata.Workspace(
    store=metadata.Store(grpc_host=METADATA_STORE_HOST, grpc_port=METADATA_STORE_PORT),
    name=METADATA_WORKSPACE_NM,
    labels={"n1": "v1"})

# metadata - Create a run in a workspace
r = metadata.Run(
    workspace=ws1,
    name="run-" + datetime.utcnow().isoformat("T") ,
)

# Create an execution in a run
exec = metadata.Execution(
    name = "execution" + datetime.utcnow().isoformat("T") ,
    workspace=ws1,
    run=r,
)

### Log a data set

In [19]:
date_set_version = "data_set_version_" + str(uuid4())
data_set = exec.log_input(
        metadata.DataSet(
            name="covid19-dataset",
            owner=OWNER,
            uri="s3://dataset/covid19",
            version=date_set_version))

# Model 구성

In [20]:
# Hyperparameters 
LEARNING_RATE = 0.00001 # List: 0.001, 0.0001, 0.0003, 0.00001, 0.00003
DENSE = 128             # Range: 50-200

In [21]:
model = Sequential()
adam_s = Adam(learning_rate = LEARNING_RATE)

#model.add(VGG16(input_shape=(224, 224, 3), include_top=False, weights='imagenet', pooling='average'))
model.add(ResNet50V2(input_shape=(256, 256, 3),include_top=False, weights='imagenet',pooling='average'))

for layer in model.layers:
    layer.trainable = False

model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(DENSE, activation='relu'))
model.add(Dense(DENSE, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=adam_s, metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50v2 (Model)           (None, 8, 8, 2048)        23564800  
_________________________________________________________________
batch_normalization_1 (Batch (None, 8, 8, 2048)        8192      
_________________________________________________________________
flatten_1 (Flatten)          (None, 131072)            0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               16777344  
_________________________________________________________________
dense_4 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 387       
Total params: 40,367,235
Trainable params: 16,798,339
Non-trainable params: 23,568,896
_________________________________

# Model 학습

In [22]:
# train the head of the network
print("Training the full stack model...")
hist = model.fit(x_train, y_train, epochs=3, validation_data=(x_val, y_val), batch_size=8)

Training the full stack model...
Train on 181 samples, validate on 46 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [23]:
loss        = hist.history['loss'][-1]
accuracy    = hist.history['accuracy'][-1]
valloss     = hist.history['val_loss'][-1]
valaccuracy = hist.history['val_accuracy'][-1]

### Log a model

In [24]:
model_version = "model_version_" + str(uuid4())
stringlist = []
model.summary(print_fn=lambda x: stringlist.append(x))
short_model_summary = "\n".join(stringlist)

model_meta = exec.log_output(
    metadata.Model(
            name="covid19-model",
            owner="OWNER",
            uri=MODEL_URI,
            model_type="|".join([x.name for x in model.layers]), #"ResNet50V2",
            training_framework={
                "name": "tensorflow",
                "version": tensorflow.__version__
            },
            hyperparameters={
                "learning_rate": LEARNING_RATE,
                "dense": DENSE
            },
            version=model_version,
            labels={"covid18-label": "l1"}))
print(model_meta)

kubeflow.metadata.metadata.Model(workspace=None, name='covid19-model', description=None, owner='OWNER', uri='s3://model/covid-19/1', version='model_version_df594385-a0e7-4a5a-a266-dd95be740f5c', model_type='resnet50v2|batch_normalization_1|flatten_1|dense_3|dense_4|dense_5', training_framework={'name': 'tensorflow', 'version': '2.1.0'}, hyperparameters={'learning_rate': 1e-05, 'dense': 128}, labels={'covid18-label': 'l1'}, id=40, create_time='2021-01-07T16:18:51.474568Z', kwargs={})


### Log the evaluation of a model

In [27]:
metrics = exec.log_output(
    metadata.Metrics(
            name="covid19-evaluation",
            owner=OWNER,
            uri="s3://eval/covid19-eval.csv",
            data_set_id=str(data_set.id),
            model_id=str(model_meta.id),
            metrics_type=metadata.Metrics.VALIDATION,
            values={"accuracy": str(valaccuracy)},
            labels={"covid18-label": "l1"}))

# Model 저장

In [28]:
os.environ.update({
    'S3_ENDPOINT'          : 'minio-service.kubeflow:9000',
    'AWS_ACCESS_KEY_ID'    : 'minio',
    'AWS_SECRET_ACCESS_KEY': 'minio123',
    'S3_USE_HTTPS'         : '0',   # Whether or not to use HTTPS. Disable with 0.                        
    'S3_VERIFY_SSL'        : '0'    # If HTTPS is used, controls if SSL should be enabled. Disable with 0.
})  

MODEL_URI="s3://model/covid-19/1"
model.save(MODEL_URI)

INFO:tensorflow:Assets written to: s3://model/covid-19/1/assets


INFO:tensorflow:Assets written to: s3://model/covid-19/1/assets


In [29]:
print(valloss)
print(valaccuracy)

0.9844821691513062
0.5652174
