In [None]:
!dvc https://github.com/iterative/dataset-registry tutorials/versioning/data.zip

In [None]:
!dvc add data

In [None]:
import numpy as np
import sys
import os

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras import applications
from tensorflow.keras.callbacks import CSVLogger
from tqdm.keras import TqdmCallback

In [None]:
experiment_id = mlflow.create_experiment(
    "cat-dog VGG classifier", 
    artifact_location=Path.cwd().joinpath("./meta/mlflow/mlartifacts").as_uri(),
    tags={"version":"v1", "priority":"P1"}
)

In [None]:
experiment = mlflow.set_experiment("MNIST MLP classifier")

In [None]:
mlflow.set_tracking_uri("http://localhost:5000")

In [3]:
path =  = os.getcwd()

In [4]:
img_width, img_height = 150, 150

In [13]:
top_model_weights_path = 'model.h5'
train_data_dir = os.path.join('data', 'train')
validation_data_dir = os.path.join('data', 'validation')
cats_train_path = os.path.join(path, train_data_dir, 'cats')
nb_train_samples = 2 * len([name for name in os.listdir(cats_train_path)
                            if os.path.isfile(
                                os.path.join(cats_train_path, name))])


In [16]:
datagen = ImageDataGenerator(rescale=1. / 255)

# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')

train_generator = datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)

val_generator = datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)

Found 1000 images belonging to 2 classes.
Found 800 images belonging to 2 classes.


In [17]:
bottleneck_features_train = model.predict(
    train_generator, nb_train_samples // batch_size)



bottleneck_features_validation = model.predict(
    val_generator, nb_validation_samples // batch_size)


np.save(open('bottleneck_features_train.npy', 'wb'),
        bottleneck_features_train)

np.save(open('bottleneck_features_validation.npy', 'wb'),
        bottleneck_features_validation)

In [18]:
train_data = np.load(open('bottleneck_features_train.npy', 'rb'))

train_labels = np.array(
    [0] * (int(nb_train_samples / 2)) + [1] * (int(nb_train_samples / 2)))

validation_data = np.load(open('bottleneck_features_validation.npy', 'rb'))

validation_labels = np.array(
    [0] * (int(nb_validation_samples / 2)) +
    [1] * (int(nb_validation_samples / 2)))

model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [19]:
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy', metrics=['accuracy'])

In [29]:
nb_validation_samples = 800
epochs = 580
batch_size = 128

In [30]:
mlflow.keras.autolog()
model.fit(train_data, train_labels,
          epochs=epochs,
          batch_size=batch_size,
          validation_data=(validation_data, validation_labels),
          verbose=0,
          callbacks=[TqdmCallback(), CSVLogger("metrics.csv")])

0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]



<tensorflow.python.keras.callbacks.History at 0x2b4bfafc6a0>

In [31]:
model.save_weights(top_model_weights_path)

In [None]:
!dvc add model.h5