# TensorflowConstrainedOptimization

In [None]:
!pip install -q -U pip==20.2

!pip install git+https://github.com/google-research/tensorflow_constrained_optimization

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip /content/drive/pathto/UTKFace28.zip -d /content/UTKFace28

In [None]:
import os
import sys
import tempfile
import glob

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_hub as hub
from sklearn.model_selection import train_test_split

In [None]:
def utk_generator(include_labels=True, include_groups=True, train=True, info=False):
    paths = glob.glob("UTKFace28/UTKFace28/*")

    files_train, files_test = train_test_split(paths, train_size=0.7, random_state=43)
    if train:
        paths = files_train
    else:
        paths = files_test

    for path in paths:
        try: 
            filename = os.path.split(path)[1]
            filename = os.path.splitext(filename)[0]
            age, gender, race, _ = filename.split('_')

            labels = (int(gender), int(age), int(race))

            img = tf.io.read_file(path)
            img = tf.image.decode_jpeg(img)
            img = tf.image.convert_image_dtype(img, tf.float32)
            label = labels[0]
            
            group = 1 if race == 2 else 0

            if info:
              yield (filename, gender, age, race)

            elif include_groups:
                yield img, label, group
            elif include_labels:
                yield img, label
            else:
                yield img
        except ValueError as ex:
            continue

def generate_utk_dataset(include_labels=True, include_groups=True, train=True):
    if include_groups:
        return tf.data.Dataset.from_generator(utk_generator, args=[True, True, train], output_types=(tf.float32, tf.float32, tf.float32), output_shapes = ((28,28,3), (), ()),)
    elif include_labels:
        return tf.data.Dataset.from_generator(utk_generator, args=[True, False, train], output_types=(tf.float32, tf.float32), output_shapes = ((28,28,3), ()),)
    else:
        return tf.data.Dataset.from_generator(utk_generator, args=[False, False, train], output_types=(tf.float32), output_shapes = (28,28,3),)

In [None]:
batch_size = 32
img_size = (28, 28)


train_ds = generate_utk_dataset(include_labels=True, include_groups=False, train=True)
test_ds = generate_utk_dataset(include_labels=True, include_groups=False, train=False)


AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1024).repeat().batch(batch_size).prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().batch(1).prefetch(buffer_size=AUTOTUNE)

In [None]:
def create_model():
  model = tf.keras.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/imagenet/resnet_v1_50/feature_vector/5",
                    trainable=True),  # Can be True, see below.
    tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model.build([None, 28, 28, 3])  # Batch input shape.

  base_learning_rate = 0.0001
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss='mse',
              metrics=['mae', 'acc'])
  
  return model

In [None]:
model = create_model()
epochs=10
history = model.fit(
  train_ds,
  epochs=epochs,
  steps_per_epoch=1000
)

In [None]:
plt.plot(history.history['acc'], label='acc')
plt.legend()

In [None]:
model.save("baslineresnet28")

In [None]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(test_ds)
print("test loss, test acc:", results)

In [None]:
def generate_test_summary(model, test_ds):
  Y_pred = model.predict(test_ds)
  gender_pred = np.where(Y_pred > 0.5, 1, 0)
  gender_pred=gender_pred.flatten()
  filenames = []
  genders = []
  ages = []
  races = []
  for (filename, gender, age, race) in utk_generator(train=False, info=True):
    filenames.append(filename)
    genders.append(gender)
    ages.append(age)
    races.append(race)

  df = pd.DataFrame({'gender_true': genders,'age_true': ages,'race_true': races,'gender_pred': gender_pred,'files': filenames})
  return df



# Constrained Model

In [None]:
import tensorflow_constrained_optimization as tfco
from tensorflow_metadata.proto.v0 import schema_pb2

In [None]:
if tf.__version__ < "2.0.0":
  tf.compat.v1.enable_eager_execution()
  print("Eager execution enabled.")
else:
  print("Eager execution enabled by default.")

print("TensorFlow " + tf.__version__)

In [None]:
def save_model(model, subdir):
  base_dir = tempfile.mkdtemp(prefix='saved_models')
  model_location = os.path.join(base_dir, subdir)
  model.save(model_location, save_format='tf')
  return model_location

In [None]:
# The batch size is needed to create the input, labels and group tensors.
# These tensors are initialized with all 0's. They will eventually be assigned
# the batch content to them. A large batch size is chosen so that there are
# enough number of "Young" and "Not Young" examples in each batch.
# set_seeds()
model_constrained = create_model()
BATCH_SIZE = 32
IMAGE_SIZE = 28
# Create input tensor.
input_tensor = tf.Variable(
    np.zeros((BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 3), dtype="float32"),
    name="input")

# Create labels and group tensors (assuming both labels and groups are binary).
labels_tensor = tf.Variable(
    np.zeros(BATCH_SIZE, dtype="float32"), name="labels")
groups_tensor = tf.Variable(
    np.zeros(BATCH_SIZE, dtype="float32"), name="groups")

# Create a function that returns the applied 'model' to the input tensor
# and generates constrained predictions.
def predictions():
  return model_constrained(input_tensor)

# Create overall context and subsetted context.
# The subsetted context contains subset of examples where group attribute < 1
# (i.e. the subset of "Not Young" celebrity images).
# "groups_tensor < 1" is used instead of "groups_tensor == 0" as the former
# would be a comparison on the tensor value, while the latter would be a
# comparison on the Tensor object.
context = tfco.rate_context(predictions, labels=lambda:labels_tensor)
context_subset = context.subset(lambda:groups_tensor > 0)

# Setup list of constraints.
# In this notebook, the constraint will just be: FPR to less or equal to 5%.
constraints = [tfco.false_negative_rate(context_subset) <= 0.01]

# Setup rate minimization problem: minimize overall error rate s.t. constraints.
problem = tfco.RateMinimizationProblem(tfco.error_rate(context), constraints)

# Create constrained optimizer and obtain train_op.
# Separate optimizers are specified for the objective and constraints
optimizer = tfco.ProxyLagrangianOptimizerV2(
      optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
      constraint_optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
      num_constraints=problem.num_constraints)

# A list of all trainable variables is also needed to use TFCO.
var_list = (model_constrained.trainable_weights + list(problem.trainable_variables) +
            optimizer.trainable_variables())

In [None]:
# Obtain train set batches.
train_ds_constrained = generate_utk_dataset(include_labels=True, include_groups=True, train=True)

AUTOTUNE = tf.data.AUTOTUNE
train_ds_constrained = train_ds_constrained.cache().shuffle(1024).repeat().batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

NUM_ITERATIONS = 100  # Number of training iterations.
SKIP_ITERATIONS = 10  # Print training stats once in this many iterations.

# Create temp directory for saving snapshots of models.
temp_directory = tempfile.mktemp()
os.mkdir(temp_directory)

# List of objective and constraints across iterations.
objective_list = []
violations_list = []

# Training iterations.
iteration_count = 0
for (image, label, group) in train_ds_constrained:
  # Assign current batch to input, labels and groups tensors.
  input_tensor.assign(image)
  labels_tensor.assign(label)
  groups_tensor.assign(group)

  # Run gradient update.
  optimizer.minimize(problem, var_list=var_list)

  # Record objective and violations.
  objective = problem.objective()
  violations = problem.constraints()

  sys.stdout.write(
      "\r Iteration %d: Hinge Loss = %.3f, Max. Constraint Violation = %.3f"
      % (iteration_count + 1, objective, max(violations)))

  # Snapshot model once in SKIP_ITERATIONS iterations.
  if iteration_count % SKIP_ITERATIONS == 0:
    objective_list.append(objective)
    violations_list.append(violations)

    # Save snapshot of model weights.
    model_constrained.save_weights(
        temp_directory + "/celeb_a_constrained_" +
        str(iteration_count / SKIP_ITERATIONS) + ".h5")

  iteration_count += 1
  if iteration_count >= NUM_ITERATIONS:
    break

# Choose best model from recorded iterates and load that model.
best_index = tfco.find_best_candidate_index(
    np.array(objective_list), np.array(violations_list))

model_constrained.load_weights(
    temp_directory + "/celeb_a_constrained_" + str(best_index) + ".0.h5")

# Remove temp directory.
os.system("rm -r " + temp_directory)

In [None]:
df = generate_test_summary(model_constrained, test_ds)

In [None]:
df.to_csv('constrained.csv')