In [0]:
%matplotlib inline

In [0]:
# ML and numerical libraries
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# DANN paper implementation helpers
import os
import cv2
from tensorflow.python.framework import ops
import urllib

# Text processing
from sklearn.feature_extraction.text import CountVectorizer

# Data visualization
from sklearn.manifold import TSNE

In [0]:
def read_image_from_path(path):
    resp = urllib.request.urlopen(os.path.join("https://raw.githubusercontent.com/radusqrt/research/master/datasets/crisis-mmd", path))
    image = np.asarray(bytearray(resp.read()), dtype="uint8")
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    return image
  

def preprocess_image(paths):
    paths = [cv2.resize(read_image_from_path(path), (160, 160), interpolation=cv2.INTER_LINEAR) for path in paths]
    return paths

In [0]:
class FlipGradientBuilder(object):
    def __init__(self):
        self.num_calls = 0

    def __call__(self, x, l=1.0):
        grad_name = "FlipGradient%d" % self.num_calls
        @ops.RegisterGradient(grad_name)
        def _flip_gradients(op, grad):
            return [tf.negative(grad) * l]
        
        g = tf.compat.v1.get_default_graph()
        with g.gradient_override_map({"Identity": grad_name}):
            y = tf.identity(x)
            
        self.num_calls += 1
        return y
    
flip_gradient = FlipGradientBuilder()

In [0]:
def weight_variable(shape):
    initial = tf.random.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    return tf.nn.max_pool2d(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')


def shuffle_aligned_list(data):
    print(data[0].shape)
    """Shuffle arrays in a list by shuffling each array identically."""
    num = data[0].shape[0]
    p = np.random.permutation(num)
    return [d[p] for d in data]


# def batch_generator(data, batch_size, shuffle=True):
def batch_generator(data, batch_size, shuffle=False):
    """Generate batches of data.
    
    Given a list of array-like objects, generate batches of a given
    size by yielding a list of array-like objects corresponding to the
    same slice of each input.
    """
    if shuffle:
        data = shuffle_aligned_list(data)

    batch_count = 0
    while True:
        if batch_count * batch_size + batch_size >= len(data[0]):
            batch_count = 0

            if shuffle:
                data = shuffle_aligned_list(data)

        start = batch_count * batch_size
        end = start + batch_size
        batch_count += 1
        yield [d[start:end] for d in data]

In [0]:
# Mandatory for CUDA, NVIDIA, Linux Mint compatibility with middle-level TF code
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
sess = InteractiveSession(config=config)

# Reduce logging output.
tf.logging.set_verbosity(tf.logging.ERROR)

In [0]:
# Useless columns
useless_columns = ['tweet_id', 'image_id', 'text_info', 'text_info_conf', 'image_info_conf', 'text_human', 'text_human_conf', 'image_human', 'image_human_conf', 'tweet_text', 'image_url']

# Read and preprocess datasets
hurricane_harvey = pd.read_csv(
        'https://raw.githubusercontent.com/radusqrt/research/master/datasets/crisis-mmd/annotations/hurricane_harvey_final_data.tsv', sep='\t', header=0).drop(useless_columns, axis=1)
hurricane_irma = pd.read_csv(
        'https://github.com/radusqrt/research/blob/master/datasets/crisis-mmd/annotations/hurricane_irma_final_data.tsv?raw=true', sep='\t', header=0).drop(useless_columns, axis=1)
hurricane_maria = pd.read_csv(
        'https://github.com/radusqrt/research/blob/master/datasets/crisis-mmd/annotations/hurricane_maria_final_data.tsv?raw=true', sep='\t', header=0).drop(useless_columns, axis=1)
source_df = pd.concat([hurricane_harvey, hurricane_irma, hurricane_maria])
source_df = source_df[source_df.image_damage.notnull()]
source_df = pd.concat([source_df, pd.get_dummies(source_df['image_damage'])], axis=1).drop(['image_damage'], axis=1)

target_df = pd.read_csv(
        'https://raw.githubusercontent.com/radusqrt/research/master/datasets/crisis-mmd/annotations/california_wildfires_final_data.tsv', sep='\t', header=0).drop(useless_columns, axis=1)
target_df = target_df[target_df.image_damage.notnull()]
target_df = pd.concat([target_df, pd.get_dummies(target_df['image_damage'])], axis=1).drop(['image_damage'], axis=1)

source_examples = preprocess_image(source_df['image_path'])
source_labels = source_df[['severe_damage', 'little_or_no_damage', 'mild_damage']]

target_examples = preprocess_image(target_df['image_path'])
target_labels = target_df[['severe_damage', 'little_or_no_damage', 'mild_damage']]

In [0]:
X = np.array(source_examples)
X_target = np.array(target_examples)
source_labels = np.array(source_labels)
target_labels = np.array(target_labels)

# Split training and testing data
X_source_train, X_source_test, y_source_train, y_source_test = \
    train_test_split(X, source_labels, test_size=0.3, random_state=42)
X_target_train, X_target_test, y_target_train, y_target_test = \
    train_test_split(X_target, target_labels, test_size=0.3, random_state=42)

print(X_source_train.shape, X_source_test.shape, X_target_train.shape, X_target_test.shape)

(1847, 160, 160, 3) (792, 160, 160, 3) (381, 160, 160, 3) (164, 160, 160, 3)


In [0]:
# Create a mixed dataset for TSNE visualization
num_test = 150
combined_test_imgs = np.vstack([X_source_test[:num_test], X_target_test[:num_test]])
combined_test_labels = np.vstack([y_source_test[:num_test], y_target_test[:num_test]])
combined_test_domain = np.vstack([np.tile([1., 0.], [num_test, 1]),
        np.tile([0., 1.], [num_test, 1])])

In [0]:
batch_size = 64

class DANN_Model(object):
    """Domain adaptation model."""
    def __init__(self):
        self._build_model()

    def _build_model(self):
        self.X = tf.placeholder(tf.uint8, [None, 160, 160, 3])
        self.y = tf.placeholder(tf.float32, [None, 3])
        self.domain = tf.placeholder(tf.float32, [None, 2])
        self.l = tf.placeholder(tf.float32, [])
        self.train = tf.placeholder(tf.bool, [])
        
        # CNN model for feature extraction
        with tf.variable_scope('feature_extractor'):

            W_conv0 = weight_variable([5, 5, 3, 32])
            b_conv0 = bias_variable([32])
            h_conv0 = tf.nn.relu(conv2d(tf.cast(self.X, tf.float32), W_conv0) + b_conv0)
            h_pool0 = max_pool_2x2(h_conv0)
            
            W_conv1 = weight_variable([5, 5, 32, 48])
            b_conv1 = bias_variable([48])
            h_conv1 = tf.nn.relu(conv2d(h_pool0, W_conv1) + b_conv1)
            h_pool1 = max_pool_2x2(h_conv1)
            
            # The domain-invariant feature
            self.feature = tf.reshape(h_pool1, [-1, 40*40*48])

        # MLP for class prediction
        with tf.variable_scope('label_predictor'):
            
            # Switches to route target examples (second half of batch) differently
            # depending on train or test mode.
            all_features = lambda: self.feature
            source_features = lambda: tf.slice(self.feature, [0, 0], [batch_size // 2, -1])
            classify_feats = tf.cond(self.train, source_features, all_features)
            
            all_labels = lambda: self.y
            source_labels = lambda: tf.slice(self.y, [0, 0], [batch_size // 2, -1])
            self.classify_labels = tf.cond(self.train, source_labels, all_labels)
            
            W_fc0 = weight_variable([40 * 40 * 48, 100])
            b_fc0 = bias_variable([100])
            h_fc0 = tf.nn.relu(tf.matmul(classify_feats, W_fc0) + b_fc0)

            W_fc1 = weight_variable([100, 100])
            b_fc1 = bias_variable([100])
            h_fc1 = tf.nn.relu(tf.matmul(h_fc0, W_fc1) + b_fc1)

            W_fc2 = weight_variable([100, 3])
            b_fc2 = bias_variable([3])
            logits = tf.matmul(h_fc1, W_fc2) + b_fc2
            
            self.pred = tf.nn.softmax(logits)
            self.pred_loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=self.classify_labels)

        # Small MLP for domain prediction with adversarial loss
        with tf.variable_scope('domain_predictor'):
            
            # Flip the gradient when backpropagating through this operation
            feat = flip_gradient(self.feature, self.l)
            
            d_W_fc0 = weight_variable([40 * 40 * 48, 100])
            d_b_fc0 = bias_variable([100])
            d_h_fc0 = tf.nn.relu(tf.matmul(feat, d_W_fc0) + d_b_fc0)
            
            d_W_fc1 = weight_variable([100, 2])
            d_b_fc1 = bias_variable([2])
            d_logits = tf.matmul(d_h_fc0, d_W_fc1) + d_b_fc1
            
            self.domain_pred = tf.nn.softmax(d_logits)
            self.domain_loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=d_logits, labels=self.domain)


In [0]:
# Build the model graph
graph = tf.compat.v1.get_default_graph()
with graph.as_default():
    model = DANN_Model()
    
    # Training
    learning_rate = tf.placeholder(tf.float32, [])
    
    pred_loss = tf.reduce_mean(model.pred_loss)
    domain_loss = tf.reduce_mean(model.domain_loss)
    total_loss = pred_loss + domain_loss

    regular_train_op = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(pred_loss)
    dann_train_op = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(total_loss)
    
    # Evaluation
    correct_label_pred = tf.equal(tf.argmax(model.classify_labels, 1), tf.argmax(model.pred, 1))
    label_acc = tf.reduce_mean(tf.cast(correct_label_pred, tf.float32))
    correct_domain_pred = tf.equal(tf.argmax(model.domain, 1), tf.argmax(model.domain_pred, 1))
    domain_acc = tf.reduce_mean(tf.cast(correct_domain_pred, tf.float32))


In [0]:
def train_and_evaluate(training_mode, graph, model, num_steps=2000, verbose=True):
    """Helper to run the model with different training modes."""

#     with session as sess:
    tf.global_variables_initializer().run()

    # Batch generators
    gen_source_batch = batch_generator(
        [X_source_train, y_source_train], batch_size // 2)
    gen_target_batch = batch_generator(
        [X_target_train, y_target_train], batch_size // 2)
    gen_source_only_batch = batch_generator(
        [X_source_train, y_source_train], batch_size)
    gen_target_only_batch = batch_generator(
        [X_target_train, y_target_train], batch_size)

    domain_labels = np.vstack([np.tile([1., 0.], [batch_size // 2, 1]),
                               np.tile([0., 1.], [batch_size // 2, 1])])

    # Training loop
    for i in range(num_steps):
        if i % 500 == 0:
            print('step {0} out of {1}'.format(i, num_steps))
        # Adaptation param and learning rate schedule as described in the paper
        p = float(i) / num_steps
        l = 2. / (1. + np.exp(-10. * p)) - 1
        lr = 0.0001 / (1. + 10 * p)**0.75

        # Training step
        if training_mode == 'dann':

            X0, y0 = next(gen_source_batch)
            X1, y1 = next(gen_target_batch)
            X = np.vstack([X0, X1])
            y = np.vstack([y0, y1])

            _, batch_loss, dloss, ploss, d_acc, p_acc = sess.run(
                [dann_train_op, total_loss, domain_loss, pred_loss, domain_acc, label_acc],
                feed_dict={model.X: X, model.y: y, model.domain: domain_labels,
                           model.train: True, model.l: l, learning_rate: lr})

            if verbose and i % 100 == 0:
                print('loss: {}  d_acc: {}  p_acc: {}  p: {}  l: {}  lr: {}'.format(
                        batch_loss, d_acc, p_acc, p, l, lr))

        elif training_mode == 'source':
            X, y = next(gen_source_only_batch)
            _, batch_loss = sess.run([regular_train_op, pred_loss],
                                 feed_dict={model.X: X, model.y: y, model.train: False,
                                            model.l: l, learning_rate: lr})
            
            if verbose and i % 100 == 0:
                print('loss: {}  p: {}  l: {}  lr: {}'.format(
                        batch_loss, p, l, lr))

        elif training_mode == 'target':
            X, y = next(gen_target_only_batch)
            _, batch_loss = sess.run([regular_train_op, pred_loss],
                                 feed_dict={model.X: X, model.y: y, model.train: False,
                                            model.l: l, learning_rate: lr})

    # Compute final evaluation on test data
    source_acc = sess.run(label_acc,
                        feed_dict={model.X: X_source_test, model.y: y_source_test,
                                   model.train: False})

    target_acc = sess.run(label_acc,
                        feed_dict={model.X: X_target_test, model.y: y_target_test,
                                   model.train: False})

    test_domain_acc = sess.run(domain_acc,
                        feed_dict={model.X: combined_test_imgs,
                                   model.domain: combined_test_domain, model.l: 1.0})

    test_emb = sess.run(model.feature, feed_dict={model.X: combined_test_imgs})
    return source_acc, target_acc, test_domain_acc, test_emb


print('\nSource only training')
source_acc, target_acc, _, source_only_emb = train_and_evaluate('source', graph, model)
print('Source accuracy:', source_acc)
print('Target accuracy:', target_acc)

print('\nDomain adaptation training')
source_acc, target_acc, d_acc, dann_emb = train_and_evaluate('dann', graph, model)
print('Source accuracy:', source_acc)
print('Target accuracy:', target_acc)
print('Domain accuracy:', d_acc)


Source only training
step 0 out of 2000
loss: 1290.6116943359375  p: 0.0  l: 0.0  lr: 0.0001
loss: 0.9948427677154541  p: 0.05  l: 0.2449186624037092  lr: 7.377879464668812e-05
loss: 0.9356509447097778  p: 0.1  l: 0.4621171572600098  lr: 5.946035575013606e-05
loss: 1.0079995393753052  p: 0.15  l: 0.6351489523872873  lr: 5.029733718731742e-05
loss: 0.9941279888153076  p: 0.2  l: 0.7615941559557646  lr: 4.3869133765083086e-05
step 500 out of 2000
loss: 0.9755426645278931  p: 0.25  l: 0.8482836399575131  lr: 3.907949713906801e-05
loss: 0.9790993928909302  p: 0.3  l: 0.9051482536448667  lr: 3.535533905932738e-05
loss: 0.9543672800064087  p: 0.35  l: 0.9413755384972873  lr: 3.236611811382156e-05
loss: 0.9642907977104187  p: 0.4  l: 0.9640275800758169  lr: 2.9906975624424413e-05
loss: 0.9394042491912842  p: 0.45  l: 0.9780261147388136  lr: 2.78437664873526e-05
step 1000 out of 2000
loss: 1.015130639076233  p: 0.5  l: 0.9866142981514305  lr: 2.6084743001221458e-05
loss: 0.9936988949775696  p

In [0]:
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=3000)
source_only_tsne = tsne.fit_transform(source_only_emb)

tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=3000)
dann_tsne = tsne.fit_transform(dann_emb)
        
plot_embedding(source_only_tsne, combined_test_labels.argmax(1), combined_test_domain.argmax(1), 'Source only')
plot_embedding(dann_tsne, combined_test_labels.argmax(1), combined_test_domain.argmax(1), 'Domain Adaptation')

  self.explained_variance_ / total_var.sum()


NameError: ignored

In [0]:
sess.close()