In [None]:
!dvc https://github.com/iterative/dataset-registry tutorials/versioning/data.zip

In [3]:
!dvc add data

'dvc' is not recognized as an internal or external command,
operable program or batch file.


In [None]:
git add data.dvc .gitignore

In [58]:
import numpy as np
import sys
import os

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras import applications
from tensorflow.keras.callbacks import CSVLogger
from tqdm.keras import TqdmCallback
from tensorflow import keras 

import mlflow
from pathlib import Path

In [66]:
experiment_id = mlflow.create_experiment(
    "cat-dog VGG classifier", 
    artifact_location=Path.cwd().joinpath("./metadata/mlflow/mlartifacts").as_uri(),
    tags={"version":"v1", "priority":"P1"}
)

RestException: BAD_REQUEST: (sqlite3.IntegrityError) UNIQUE constraint failed: experiments.name
[SQL: INSERT INTO experiments (name, artifact_location, lifecycle_stage) VALUES (?, ?, ?)]
[parameters: ('cat-dog VGG classifier', 'file:///D:/production/DL_codes/metadata/mlflow/mlartifacts', 'active')]
(Background on this error at: https://sqlalche.me/e/14/gkpj)

In [67]:
experiment = mlflow.set_experiment("cat-dog VGG classifier")

In [68]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [13]:
path = os.getcwd()

In [14]:
img_width, img_height = 150, 150

In [15]:
top_model_weights_path = 'model.h5'
train_data_dir = os.path.join('data', 'train')
validation_data_dir = os.path.join('data', 'validation')
cats_train_path = os.path.join(path, train_data_dir, 'cats')
nb_train_samples = 2 * len([name for name in os.listdir(cats_train_path)
                            if os.path.isfile(
                                os.path.join(cats_train_path, name))])


In [21]:
batch_size = 128

In [22]:
nb_validation_samples = 800

In [18]:
datagen = ImageDataGenerator(rescale=1. / 255)

# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')

train_generator = datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)

val_generator = datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)

Found 1000 images belonging to 2 classes.
Found 800 images belonging to 2 classes.


In [23]:
bottleneck_features_train = model.predict(
    train_generator, nb_train_samples // batch_size)



bottleneck_features_validation = model.predict(
    val_generator, nb_validation_samples // batch_size)


np.save(open('bottleneck_features_train.npy', 'wb'),
        bottleneck_features_train)

np.save(open('bottleneck_features_validation.npy', 'wb'),
        bottleneck_features_validation)

In [24]:
train_data = np.load(open('bottleneck_features_train.npy', 'rb'))

train_labels = np.array(
    [0] * (int(nb_train_samples / 2)) + [1] * (int(nb_train_samples / 2)))

validation_data = np.load(open('bottleneck_features_validation.npy', 'rb'))

validation_labels = np.array(
    [0] * (int(nb_validation_samples / 2)) +
    [1] * (int(nb_validation_samples / 2)))

model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [51]:
optimizer = 'rmsprop'
loss_fn = 'binary_crossentropy'

In [60]:
model.compile(optimizer=optimizer,
              loss=loss_fn, metrics=[keras.metrics.Accuracy(), keras.metrics.Precision(), keras.metrics.Recall()])

In [61]:
epochs = 100

In [79]:
with mlflow.start_run(experiment_id=experiment_id, description="A model to classify between cat and dog example") as run:
    
    history = model.fit(train_data, train_labels,
          epochs=epochs,
          batch_size=batch_size,
          validation_data=(validation_data, validation_labels),
          verbose=0,
          callbacks=[TqdmCallback(), CSVLogger("metrics.csv")])
    
    mlflow.keras.log_model(model, "models")
    
    for i in range(epochs):    
        mlflow.log_metric("loss", history.history['loss'][i])
        mlflow.log_metric("accuracy", history.history['accuracy'][i])
        mlflow.log_metric("val_loss", history.history['val_loss'][i])
        mlflow.log_metric("val_accuracy", history.history['val_accuracy'][i])
        mlflow.log_metric("val_precision", history.history['val_precision'][i])
        mlflow.log_metric("val_recall", history.history['val_recall'][i])
        
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("loss_function", loss_fn)
    mlflow.log_param("optimizer", optimizer)
    mlflow.log_param("batch_size", batch_size)



0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
INFO:tensorflow:Assets written to: C:\PROGRA~1\KMSpico\temp\tmp37_katoc\model\data\model\assets


In [80]:
model.save_weights(top_model_weights_path)

In [None]:
!dvc add model.h5 metrics.csv