In [37]:
# Summary -
# - There are totally 59K touch gestures.
# - Only ~22K (37%) touch gestures was clicked on a leaf element which had text content.
# - Created text dataset in the following format
#   - [[e11, e21, ... e1(MAX_TOKEN), e21, e22, ... e2(MAX_TOKEN), TARGET_TEXT],
#      ...
#     ]
#   - Vectorized the dataset.
# - Tried a simple classification model with a single hidden layer(1024).
#   - Accuracy on train data : 48%
#   - Accuracy on validation (20%) : 50%

In [38]:
# import statements.
import datetime
import glob
import json
import numpy as np
import os
import PIL
import tensorflow as tf
import re
import string
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [39]:
# Constants.
LONG_TOUCH_THRESHOLD = 5
DIM_X = 1440
DIM_Y = 2560
MAX_TOKEN = 64
BATCH_SIZE = 100
BUFFER_SIZE = 100
VOCAB_SIZE = 500
TRAIN_SIZE = 0.8
VAL_SIZE = 0.1
TEST_SIZE = 0.1
TRACES_PATH = 'filtered_traces/*/*'
NEGATIVE_SAMPLE_TARGET = '[null]'
PLACEHOLDER_TEXT = 'n/a'
y_true = []

In [40]:
# Gets all leaf nodes for a given element.
def get_leaf_nodes(element, leaf_nodes):
    if not element:
        return leaf_nodes
    if 'children' not in element:
        leaf_nodes.append(element)
        return leaf_nodes
    for child in element['children']:
        get_leaf_nodes(child, leaf_nodes)
    return leaf_nodes


def get_all_leaf_nodes(view_hierarchy_json):
    activity = view_hierarchy_json.get('activity')
    if not activity:
        return dataset
    root = activity.get('root')
    return get_leaf_nodes(root, [])


def get_target_text(leaf_nodes, x, y):
    target_text = None
    for leaf_node in leaf_nodes:
        bounds = leaf_node['bounds']
        if bounds[0] <= x and bounds[2] >= x and bounds[1] <= y and bounds[3] >= y:
            if 'text' in leaf_node:
                target_text = leaf_node['text'] or leaf_node.get('text-hint')
    return target_text


def get_leaf_node_features(leaf_nodes):
    i = 1
    element_features = []
    for leaf_node in leaf_nodes:
        if 'text' in leaf_node:
            text = leaf_node['text'] or leaf_node.get('text-hint')
            _class = leaf_node['class']
            element_features.append(str(text))
            element_features.append(str(_class))
            i += 1
            if i == MAX_TOKEN:
                break
    return element_features



# Identifies if a given gesture is a TOUCH gesture.
# In this task, we will only be focussing on TOUCH gestures.
def is_touch_gesture(gesture):
    if len(gesture) <= LONG_TOUCH_THRESHOLD:
        return True
    return False

In [41]:
dirs = glob.glob(TRACES_PATH)
touch_gesture_count = 0
non_touch_gesture_count = 0
for d in dirs:
  with open(f'{d}/gestures.json') as f:
    gestures = json.load(f)
    gestures = [gestures[x] for x in sorted(gestures, key=lambda x: int(x))]
    for gesture in gestures:
        if is_touch_gesture(gesture):
            touch_gesture_count += 1
        else:
            non_touch_gesture_count += 1
print('Number of touch gestures ', touch_gesture_count)
print('Number of non-touch gestures ', non_touch_gesture_count)

Number of touch gestures  59602
Number of non-touch gestures  6659


In [42]:
# Processes view hierarchies to construct dataset.
# Extract texts from MAX_TOKEN elements from both view hierarchies.
# Construct the dataset in the following format -
# [[e11, e21, ... e1(MAX_TOKEN), e21, e22, ... e2(MAX_TOKEN), TARGET_TEXT], ...]

def get_features(leaf_nodes1, leaf_nodes2):
    screen1_features = get_leaf_node_features(leaf_nodes1)
    screen2_features = get_leaf_node_features(leaf_nodes2)

    for i in range(int(len(screen1_features) / 2), MAX_TOKEN):
        screen1_features.append(PLACEHOLDER_TEXT)  # Element Text.
        screen1_features.append(PLACEHOLDER_TEXT)  # Element Class.
    for i in range(int(len(screen2_features) / 2), MAX_TOKEN):
        screen2_features.append(PLACEHOLDER_TEXT)  # Element Text.
        screen1_features.append(PLACEHOLDER_TEXT)  # Element Class.
    
    return screen1_features, screen2_features


def process_view_hierarchy(view_hierarchy1, view_hierarchy2, dataset, is_positive_sample = True):
    if not view_hierarchy1 or not view_hierarchy2:
        return dataset
    
    trace_path = view_hierarchy1.split('view_hierarchies')[0]
    gesture_path = f'{trace_path}/gestures.json'
    with open(gesture_path) as file:
        gestures = json.load(file)

    with open(view_hierarchy1) as file:
        view_hierarchy1_json = json.load(file)
    with open(view_hierarchy2) as file:
        view_hierarchy2_json = json.load(file)
    
    if not view_hierarchy1_json or not view_hierarchy2_json:
        return dataset

    ui_number = view_hierarchy1.split('/')[-1].split('.')[0]
    gesture = gestures[ui_number]
    if not is_touch_gesture(gesture):
        return dataset
    
    if not len(gesture):
        return dataset
    x_cord = gesture[0][0]
    y_cord = gesture[0][1]
    x = x_cord * DIM_X
    y = y_cord * DIM_Y

    leaf_nodes1 = get_all_leaf_nodes(view_hierarchy1_json)
    leaf_nodes2 = get_all_leaf_nodes(view_hierarchy2_json)

    target_text = get_target_text(leaf_nodes1, x, y)
    if not target_text:
        return dataset
    
    screen1_features, screen2_features = get_features(leaf_nodes1, leaf_nodes2)

    if is_positive_sample:
        dataset.append(screen1_features + screen2_features)
        y_true.append(1)
    else:
        dataset.append(screen1_features + screen2_features)
        y_true.append(0)
    return dataset
        

def process_trace(trace_path, dataset):
    view_hierarchies_path = f'{trace_path}/view_hierarchies/*'
    view_hierarchies = sorted(glob.glob(view_hierarchies_path))
    for i in range(len(view_hierarchies) - 1):
        dataset = process_view_hierarchy(view_hierarchies[i], view_hierarchies[i+1], dataset)


def add_negative_samples(dataset):
    traces = sorted(glob.glob(TRACES_PATH))
    total_positive_samples = len(dataset)
    negative_samples_threshold = 1 * total_positive_samples
    negative_samples_counter = 0
    for i in range(len(traces) - 1):
        trace_path1 = traces[i]
        trace_path2 = traces[i+1]
        view_hierarchies1_path = sorted(glob.glob(f'{trace_path1}/view_hierarchies/*'))
        view_hierarchies2_path = sorted(glob.glob(f'{trace_path2}/view_hierarchies/*'))
        for (view_hierarchy1, view_hierarchy2) in zip(view_hierarchies1_path, view_hierarchies2_path):
            dataset = process_view_hierarchy(view_hierarchy1, view_hierarchy2, dataset, False)
            negative_samples_counter += 1
            if negative_samples_counter >= negative_samples_threshold:
                return dataset
    return dataset


dataset = []
for trace_path in sorted(glob.glob(TRACES_PATH)):
    process_trace(trace_path, dataset)

dataset = add_negative_samples(dataset)

In [43]:
# We create a custom standardization function to lowercase the text and 
# remove punctuation.
def custom_standardization(input_data):
  lowercase = tf.strings.lower(input_data)
  return tf.strings.regex_replace(lowercase,
                                  '[%s]' % re.escape(string.punctuation), '')

# Define the number of words in a sequence.
sequence_length = 1

# Use the text vectorization layer to normalize, split, and map strings to
# integers. Set output_sequence_length length to pad all samples to same length.
vectorize_layer = tf.keras.layers.experimental.preprocessing.TextVectorization(
    standardize=custom_standardization,
    output_mode='int',
    output_sequence_length=sequence_length)

all_words = []
for row in dataset:
    for word in row[:-1]:
        all_words.append(str(word))
unique_words = set(all_words)
print('unique_words = ',len(unique_words))
vectorize_layer.adapt(list(unique_words))

unique_words =  202789


In [44]:
# Save the created vocabulary for reference.
inverse_vocab = vectorize_layer.get_vocabulary()
print(inverse_vocab[:20])
len(inverse_vocab)

['', '[UNK]', 'the', 'to', 'and', 'of', 'you', 'a', 'your', 'in', 'or', 'for', 'is', 'with', 'on', 'this', 'by', 'that', 'be', 'any']


125808

In [45]:
# Vectorize the data.
text_ds = tf.data.Dataset.from_tensor_slices(dataset)
text_vector_ds = text_ds.map(vectorize_layer)

In [46]:
sequences = list(text_vector_ds.as_numpy_iterator())
sequences = np.squeeze(sequences)

for seq in sequences[:5]:
  print(f"{seq} => {[inverse_vocab[i] for i in seq]}")

[   118 112981   8053 112981    419 112981   2296 112994   8053 112981
    419 112981   2296 112994    394 112981    394 112981   2296 112981
  42035 112995  34288 112995  10910 112995  71646 112981   2296 112981
   1219   1219   1219   1219   1219   1219   1219   1219   1219   1219
   1219   1219   1219   1219   1219   1219   1219   1219   1219   1219
   1219   1219   1219   1219   1219   1219   1219   1219   1219   1219
   1219   1219   1219   1219   1219   1219   1219   1219   1219   1219
   1219   1219   1219   1219   1219   1219   1219   1219   1219   1219
   1219   1219   1219   1219   1219   1219   1219   1219   1219   1219
   1219   1219   1219   1219   1219   1219   1219   1219   1219   1219
   1219   1219   1219   1219   1219   1219   1219   1219   1219   1219
   1219   1219   1219   1219   1219   1219   1219   1219   1219   1219
   1219   1219   1219   1219   1219   1219   1219   1219   1219   1219
   1219   1219   1219   1219   1219   1219   1219   1219   1219   1219
   121

In [47]:
def map_to_dataset(sequences, inverse_vocab):
    num_ns = len(inverse_vocab)
    labels = []
    input_data = []
    for input_instance in sequences:
        y = input_instance[-1:]
        labels.append(y)
        input_data.append(input_instance[:-1])
#     categorized_labels = tf.keras.utils.to_categorical(labels, num_ns)
    return tf.data.Dataset.from_tensor_slices((input_data, labels))

In [48]:
y_true_reshaped = np.array(y_true).reshape(len(sequences), -1)
sequences_stack = np.hstack((sequences, y_true_reshaped))
train, test = train_test_split(sequences_stack, test_size=TEST_SIZE)
train, val = train_test_split(sequences_stack, test_size=VAL_SIZE)

In [49]:
len(y_true)

30019

In [50]:
train = map_to_dataset(train, inverse_vocab).batch(BATCH_SIZE)
val = map_to_dataset(val, inverse_vocab).batch(BATCH_SIZE)
test = map_to_dataset(test, inverse_vocab).batch(BATCH_SIZE)

In [51]:
model = tf.keras.Sequential([
#     tf.keras.layers.Flatten(input_shape=(MAX_TOKEN*2, )),
    tf.keras.layers.Dense(100, name='Input'),
    tf.keras.layers.Dense(100, activation='relu', name='Hidden'),
    tf.keras.layers.Dense(1, activation='softmax', name='Softmax_Activation')
])

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train, validation_data=val, epochs=100, callbacks=[tensorboard_callback], batch_size=BATCH_SIZE)

Epoch 1/100
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
E

<tensorflow.python.keras.callbacks.History at 0x7f1ab660ba50>