In [1]:
# import statements.
import datetime
import glob
import json
import numpy as np
import os
import PIL
import tensorflow as tf
import re
import string
from sklearn.model_selection import train_test_split
import tensorflow_hub as hub
import matplotlib.pyplot as plt
from torchvision import transforms
from PIL import Image

In [2]:
# Constants.
LONG_TOUCH_THRESHOLD = 5
DIM_X = 1440
DIM_Y = 2560
IMG_DIM = 224
MAX_TOKEN = 64
BATCH_SIZE = 100
BUFFER_SIZE = 100
VOCAB_SIZE = 500
TRAIN_SIZE = 0.8
VAL_SIZE = 0.1
TEST_SIZE = 0.1
TRACES_PATH = 'filtered_traces/com.google.android*/*'
NEGATIVE_SAMPLE_TARGET = '[null]'
PLACEHOLDER_TEXT = 'n/a'
PLACEHOLDER_IMG = 'Blank.jpg'

In [3]:
module_selection = ("mobilenet_v2_100_224", 224)
handle_base, pixels = module_selection
MODULE_HANDLE ="https://tfhub.dev/google/imagenet/{}/feature_vector/4".format(handle_base)
IMAGE_SIZE = (pixels, pixels)
print("Using {} with input size {}".format(MODULE_HANDLE, IMAGE_SIZE))

model = hub.KerasLayer(MODULE_HANDLE)

Using https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4 with input size (224, 224)


In [4]:
Image.MAX_IMAGE_PIXELS = None

# Gets all leaf nodes for a given element.
def get_leaf_nodes(element, leaf_nodes):
    if not element:
        return leaf_nodes
    if 'children' not in element:
        leaf_nodes.append(element)
        return leaf_nodes
    for child in element['children']:
        get_leaf_nodes(child, leaf_nodes)
    return leaf_nodes


def get_all_leaf_nodes(view_hierarchy_json):
    activity = view_hierarchy_json.get('activity')
    if not activity:
        return dataset
    root = activity.get('root')
    return get_leaf_nodes(root, [])


def get_target_image(screenshot, leaf_nodes, x, y):
    target_image = None
    for leaf_node in leaf_nodes:
        bounds = leaf_node['bounds']
        if bounds[0] <= x and bounds[2] >= x and bounds[1] <= y and bounds[3] >= y:
            temp_image = screenshot.crop(bounds)
            target_image = temp_image.resize((IMG_DIM, IMG_DIM), Image.ANTIALIAS)
            break
    return target_image


def get_target_image_index(screenshot, leaf_nodes, x, y):
    i = 0
    for leaf_node in leaf_nodes:
        bounds = leaf_node['bounds']
        if bounds[0] <= x and bounds[2] >= x and bounds[1] <= y and bounds[3] >= y:
            return i
        i += 1
    return -1
    

def get_leaf_node_images(screenshot, leaf_nodes):
    i = 0
    element_images = []
    for leaf_node in leaf_nodes:
        bounds = leaf_node['bounds']
        temp_image = screenshot
        image = temp_image.crop(bounds)
        resized_image = image.resize((IMG_DIM, IMG_DIM), Image.ANTIALIAS)
        element_images.append(resized_image)
        i += 1
        if i == MAX_TOKEN:
            break
    return element_images


def get_image_embeddings(images):
    image_list = []
    for image in images:
        temp = transforms.ToTensor()(image)
        image = tf.transpose(temp, perm=[2, 1, 0])
        image_list.append(image)

    embeddings = model(image_list)
    return embeddings


def get_labels_tensor(target_index):
    if target_index >= 0:
        st = tf.sparse.SparseTensor(indices=[[target_index, 0]], values=[1], dense_shape=[MAX_TOKEN, 1])
        return tf.sparse.to_dense(st)
    else:
        return tf.zeros([MAX_TOKEN, 1])


# Identifies if a given gesture is a TOUCH gesture.
# In this task, we will only be focussing on TOUCH gestures.
def is_touch_gesture(gesture):
    if len(gesture) <= LONG_TOUCH_THRESHOLD:
        return True
    return False

In [5]:
# Processes view hierarchies to construct dataset.
# Extract texts from MAX_TOKEN elements from both view hierarchies.
# Construct the dataset in the following format -
# [[e11, e21, ... e1(MAX_TOKEN), e21, e22, ... e2(MAX_TOKEN), TARGET_TEXT], ...]
def process_view_hierarchy(view_hierarchy1, view_hierarchy2, dataset, is_positive_sample = True):
    if not view_hierarchy1 or not view_hierarchy2:
        return dataset
    
    trace_path = view_hierarchy1.split('view_hierarchies')[0]
    gesture_path = f'{trace_path}/gestures.json'
    with open(gesture_path) as file:
        gestures = json.load(file)

    with open(view_hierarchy1) as file:
        view_hierarchy1_json = json.load(file)
    with open(view_hierarchy2) as file:
        view_hierarchy2_json = json.load(file)
    
    if not view_hierarchy1_json or not view_hierarchy2_json:
        return dataset

    ui_number = view_hierarchy1.split('/')[-1].split('.')[0]
    gesture = gestures[ui_number]
    if not is_touch_gesture(gesture):
        return dataset
    
    if not len(gesture):
        return dataset
    
    screenshot1_path = f'{trace_path}/screenshots/{ui_number}.jpg'
    trace_path2 = view_hierarchy2.split('view_hierarchies')[0]
    ui_number2 = view_hierarchy2.split('/')[-1].split('.')[0]
    screenshot2_path = f'{trace_path2}/screenshots/{ui_number2}.jpg'
    
    # Plot the screenshot with position where click(tap) was performed.
    screenshot1 = PIL.Image.open(screenshot1_path)
    screenshot2 = PIL.Image.open(screenshot2_path)
    
    if not screenshot1 or not screenshot2:
        return dataset
    
    # The size of the screenshot is 1080 * 1920.
    # Rescaling it to 1440 * 2560, dimension used by view hierarchy data.
    resized_screenshot1 = screenshot1.resize((DIM_X, DIM_Y), Image.ANTIALIAS)
    resized_screenshot2 = screenshot2.resize((DIM_X, DIM_Y), Image.ANTIALIAS)
    
    x_cord = gesture[0][0]
    y_cord = gesture[0][1]
    x = x_cord * DIM_X
    y = y_cord * DIM_Y

    leaf_nodes1 = get_all_leaf_nodes(view_hierarchy1_json)
    leaf_nodes2 = get_all_leaf_nodes(view_hierarchy2_json)

    target_image_index = get_target_image_index(resized_screenshot1, leaf_nodes1, x, y)
    if target_image_index == -1 or target_image_index >= MAX_TOKEN:
        return dataset
    
    screen1_element = get_leaf_node_images(resized_screenshot1, leaf_nodes1)
    screen2_element = get_leaf_node_images(resized_screenshot1, leaf_nodes2)

    placeholder = PIL.Image.open(PLACEHOLDER_IMG)
    resized_placeholder = placeholder.resize((IMG_DIM, IMG_DIM), Image.ANTIALIAS)
    for i in range(len(screen1_element), MAX_TOKEN):
        screen1_element.append(resized_placeholder)
    for i in range(len(screen2_element), MAX_TOKEN):
        screen2_element.append(resized_placeholder)
        
    image_embeddings = get_image_embeddings(screen1_element + screen2_element)
    
    if is_positive_sample:
        label = 1
    else:
        label = 0
        
    entry=[]
    entry.append(image_embeddings)
    entry.append(label)
    dataset.append(entry)
    return dataset
        

def process_trace(trace_path, dataset):
    view_hierarchies_path = f'{trace_path}/view_hierarchies/*'
    view_hierarchies = sorted(glob.glob(view_hierarchies_path))
    for i in range(len(view_hierarchies) - 1):
        dataset = process_view_hierarchy(view_hierarchies[i], view_hierarchies[i+1], dataset)


def add_negative_samples(dataset):
    traces = sorted(glob.glob(TRACES_PATH))
    total_positive_samples = len(dataset)
    negative_samples_threshold = 0.1 * total_positive_samples
    negative_samples_counter = 0
    for i in range(len(traces) - 1):
        trace_path1 = traces[i]
        trace_path2 = traces[i+1]
        view_hierarchies1_path = sorted(glob.glob(f'{trace_path1}/view_hierarchies/*'))
        view_hierarchies2_path = sorted(glob.glob(f'{trace_path2}/view_hierarchies/*'))
        for (view_hierarchy1, view_hierarchy2) in zip(view_hierarchies1_path, view_hierarchies2_path):
            dataset = process_view_hierarchy(view_hierarchy1, view_hierarchy2, dataset, False)
            negative_samples_counter += 1
            if negative_samples_counter >= negative_samples_threshold:
                break
    return dataset


dataset = []
for trace_path in sorted(glob.glob(TRACES_PATH)):
    process_trace(trace_path, dataset)

dataset = add_negative_samples(dataset)

In [6]:
total_positive_samples = len(dataset)
negative_samples_threshold = 0.1 * total_positive_samples
negative_samples_threshold

23.0

In [7]:
def map_to_dataset(dataset):
    labels = []
    input_data = []
    for input_instance in dataset:
        input_data.append(tf.reshape(input_instance[0], [-1]))
        labels.append(tf.reshape(tf.dtypes.cast(input_instance[1], tf.int32), [-1]))
    return tf.data.Dataset.from_tensor_slices((input_data, labels))

In [8]:
def map_to_dataset_classification(dataset):
    label = []
    input_data = []
    for input_instance in dataset:
        input_data.append(tf.reshape(input_instance[0], [-1]))
        label.append(tf.reshape(tf.dtypes.cast(input_instance[1], tf.int32), [-1]))
    return tf.data.Dataset.from_tensor_slices((input_data, label))

In [9]:
train, test = train_test_split(dataset, test_size=TEST_SIZE)
train, val = train_test_split(dataset, test_size=VAL_SIZE)

In [10]:
train = map_to_dataset_classification(train).batch(BATCH_SIZE)
val = map_to_dataset_classification(val).batch(BATCH_SIZE)
test = map_to_dataset_classification(test).batch(BATCH_SIZE)

In [12]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=100, activation='relu'),
    tf.keras.layers.Dense(units=100, activation='relu'),
    tf.keras.layers.Dense(units=1, activation='softmax')
])

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# model.compile(optimizer='adam',
#               loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
#               metrics=['accuracy'])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001,beta_1=0.9,beta_2=0.999,epsilon=1e-07,amsgrad=False,name='Adam'),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

model.fit(train, validation_data=val, epochs=100, callbacks=[tensorboard_callback], batch_size=BATCH_SIZE)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f90684f5890>