In [None]:
from tracing.selenium_utils.controls import *
from abc import abstractmethod

class IAction:
    
    @abstractmethod
    def apply(self, control, driver, user):
        """
        Returns True or False if action was applied successfuly
        """
        raise NotImplementedError()


class InputBirthday(IAction):
    @abstractmethod
    def get_candidates(self):
        raise NotImplementedError()
    
    def apply(self, ctrl, driver, user):
        if ctrl.type not in [Types.text, Types.select]:
            return False
                
        if ctrl.type == Types.text:
            enter_text(driver, ctrl.elem, self.get_candidates()[0])
        else:
            val = None
            for txt in self.get_candidates():
                if txt in ctrl.values:
                    val = txt
                    
            if val is None:
                print('Not found values {} in control {}'.format(self.get_candidates(), ctrl))
                return False
            
            select_combobox_value(driver, ctrl.elem, val)
        
        return True
    
class InputBDay(InputBirthday):
    def get_candidates(self):
        return ['1', '01'] 
    
    def __str__(self):
        return "InputBDay"

    
class InputBMonth(InputBirthday):
    def get_candidates(self):
        return ['01', '1', 'January', 'Jan', 'january', 'jan']
    
    def __str__(self):
        return "InputBMonth"

    
class InputBYear(InputBirthday):
    def get_candidates(self):
        return ['1972', '72']

    def __str__(self):
        return "InputBYear"

    
class Click(IAction):
    def apply(self, ctrl, driver, user):
        if ctrl.type in [Types.radiobutton, Types.checkbox, Types.link, Types.button]:
            click(driver, ctrl.elem)
            return True
        
        return False
    
    def __str__(self):
        return "Click"

    
class Wait(IAction):
    def apply(self, ctrl, driver, user):
        time.sleep(2)
        return True
    
    def __str__(self):
        return "Wait"


class Nothing(IAction):
    def apply(self, ctrl, driver, user):
        return True
    
    def __str__(self):
        return "Do Nothing"


class Actions:
    actions = [InputBDay(), InputBMonth(), InputBYear(), Click(), Wait(), Nothing()]


In [None]:
import tensorflow as tf
import tensorflow.contrib.slim as slim

class A3CModel:
    global_scope = "global_model_scope"
    local_scope = "local_model_scope"
    
    def __init__(self, num_actions, global_model = None, session = None, name = None):
        self.num_actions = num_actions
        self.global_model = global_model
        self.session = session
        self.name = name or ''
        
        self.build()
        
    @property
    def is_global(self):
        return self.global_model is None
        
    def build(self):
        if self.session is None:
            self.session = tf.Session()
        
        if self.is_global:
            with tf.variable_scope(A3CModel.global_scope):
                self.build_graph()
                self.add_loss()
                self.params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
                self.add_train_op()
        else:
            with tf.variable_scope(A3CModel.local_scope + self.name):
                self.build_graph()
                self.add_loss()
                self.params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
                self.add_update_ops()

    
    def build_graph(self):
        with tf.variable_scope('inputs') as sc:
            # Batch x h x 612
            self.img = tf.placeholder(tf.float32, (None, None, None, 4), "img")
            self.dropout = tf.placeholder(tf.float32, (), "dropout")
            
            # Learning Rate
            self.lr = tf.placeholder_with_default(0.1, (), 'lr')
            
            # Entropy Rate (use for regularization)
            self.er = tf.placeholder_with_default(0.01, (), 'er')
            
            # Batch, Number of Action
            self.performed_actions = tf.placeholder(tf.int32, (None), "performed_actions")
            
            # Batch
            self.rewards = tf.placeholder(tf.float32, (None), 'rewards')

        with tf.variable_scope('cnn') as sc:

            end_points_collection = sc.original_name_scope + '_ep'

            with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                                outputs_collections=[end_points_collection]):

                # h/2, 306
                net = slim.conv2d(self.img, 64, [5, 5], 2, padding='SAME',
                                scope='conv1')

                # h/4, 153
                net = slim.conv2d(net, 64, [5, 5], 2, padding='SAME',
                                scope='conv2')

                # h/8, 76
                net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
                net = slim.conv2d(net, 64, [5, 5], scope='conv3')

                # h/16, 38
                net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
                net = slim.conv2d(net, 64, [3, 3], scope='conv4')
                net = slim.conv2d(net, 64, [3, 3], scope='conv5')
                net = slim.conv2d(net, 64, [3, 3], scope='conv6')

                # h/32, 18
                net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')

            # Use conv2d instead of fully_connected layers.
            with slim.arg_scope([slim.conv2d],
                                  weights_initializer=tf.truncated_normal_initializer(0.005),
                                  biases_initializer=tf.constant_initializer(0.1)):

                # h/32 - 3, 1
                net = slim.avg_pool2d(net, [18, 18], padding='VALID',
                                  scope='fc')
                net = slim.dropout(net, self.dropout, scope='dropout')

                # h/32 - 3, 1
                net = slim.conv2d(net, 128, [1, 1], scope='fc2')

                # Convert end_points_collection into a end_point dict.
                end_points = slim.utils.convert_collection_to_dict(end_points_collection)

                # 128, Global Max Pooling
                net = tf.reduce_max(net, [1, 2], keepdims=False, name='global_pool')
                end_points['global_pool'] = net

                # Policy
                self.logits = slim.fully_connected(net, self.num_actions)

                self.pi = tf.nn.softmax(self.logits)
                self.v = slim.fully_connected(net, 1)

        self.end_points = end_points
        return net, end_points

    
    def init_weights(self):
        self.session.run(tf.global_variables_initializer())

    def add_loss(self):
        # Advantage
        advantage = self.rewards - self.v
        value_loss = tf.nn.l2_loss(advantage)
        
        # Policy Loss: Log(pi) * advantage
        policy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=self.performed_actions, logits=self.logits)
        policy_loss *= tf.stop_gradient(advantage)
        
        # Entropy: H(pi)
        entropy = -tf.reduce_sum(self.pi * tf.log(self.pi), axis=1, keepdims=True)  # encourage exploration
        
        # Summ Loss
        self.loss = tf.reduce_mean(0.5 * value_loss + policy_loss - self.er * entropy)
       
        
    def add_train_op(self):
        self.opt = tf.train.AdamOptimizer()


    def add_update_ops(self):
        assert not self.is_global, "Can't add pull and push operations to global model"
        assert self.global_model is not None, "Global model must bet set"
        assert self.global_model.opt is not None, "Global model must have optimizer .opt"
               
        with tf.name_scope('update'):
            # Gradients Computation
            self.grads = [g for g in tf.gradients(self.loss, self.params) if g is not None]
            
            # Pull variables from global model
            self.pull_global_op = [local_var.assign(global_var) for local_var, global_var in 
                            zip(self.params, global_model.params)]
            
            # Add gradients to global model variables
            opt = self.global_model.opt
            self.update_global_op = self.global_model.opt.apply_gradients(
                zip(self.grads, self.global_model.params))
            
    
    def update_global(self, feed_dict):
        self.session.run([self.update_global_op], feed_dict)  
        
    def pull_global(self):
        self.session.run([self.pull_global_op])

    def get_action(self, image):
        """
        Returns Action Id
        """
        print('image shape:', image.shape)
        pi = self.session.run(self.pi, feed_dict = {self.img: [image], self.dropout: 1.0})
        print('got probabilities:', pi)
        return np.random.choice(range(self.num_actions), p = pi[0])
    
    def train_from_memory(self, memory, dropout = 0.5, lr = 0.01, er = 0.01):
        assert not self.is_global, "Can't train Global Model"
        
        # 1. Convert Memory to Input Batch
        batch = memory.to_input()
        batch_size = len(batch['img'])
        print('batch_size: ', batch_size)
        if batch_size <= 0:
            return
        
        # 2. Create Feed Data
        feed_data = {
            self.img: batch['img'],
            self.performed_actions: batch['actions'],
            self.rewards: batch['rewards'],

            self.dropout: dropout,
            self.global_model.lr: lr,
            self.er: er
        }    
        
        # 3. Compute gradients and update global Model
        self.update_global(feed_data)
        
        # 4. Copy Values from Global Model
        self.pull_global()
        


In [None]:
from abc import abstractmethod
import random
import tracing.selenium_utils.controls as selenium_controls
import tracing.selenium_utils.common as common
from urllib.parse import urlparse


class IRewardsCalculator:
    
    def start(self):
        pass
    
    def before_action(self, driver, action):
        pass
    
    def after_action(self, driver, action):
        pass
    
    @abstractmethod
    def is_final(self):
        raise NotImplementedError

    
    @abstractmethod
    def calc_reward(self, is_success):
        raise NotImplementedError

    @abstractmethod
    def calc_final_reward(self):
        raise NotImplementedError


class PopupRewardsCalculator(IRewardsCalculator):
    
    def __init__(self):
        self.is_final_state = False
    
    def start(self):
        self.is_final_state = False
    
    def is_displayed(self, elem):
        try:
            # Check that location is accessible
            tmp = elem.location
            # Check selenium method is_displayed and that height > 1 and width > 1
            return elem.is_displayed() and elem.size['width'] > 1 and elem.size['height'] > 1
        except:
            return False
    
    def extract_random_controls(self, driver, max_num = 10):
        selects = selenium_controls.get_selects(driver)
        inputs = selenium_controls.get_inputs(driver)
        buttons = selenium_controls.get_buttons(driver)
        links = selenium_controls.get_links(driver)

        checkboxes = selenium_controls.get_checkboxes(driver)
        radios = selenium_controls.get_radiobuttons(driver)
        
        controls = selects + inputs + buttons + links + checkboxes + radios
        visible = [ctrl for ctrl in controls if self.is_displayed(ctrl)]
        
        if len(visible) < max_num:
            return visible
        
        return random.sample(visible, max_num)
    
    def is_popup_exists(self, driver):
        # 1. Scroll to Top
        common.scroll_to_top(driver)

        # 2. Extract visible controls
        controls = self.extract_random_controls(driver, 10)
        
        # 3. Check how many elements are hidden by other elements
        covered = 0
        for ctrl in controls:
            if not selenium_controls.is_visible(ctrl):
                covered += 1
        
        print('controls: {}, covered: {}'.format(len(controls), covered))
        return covered >= 3
    
    def get_domain(self, url):
        return urlparse(url).netloc
    
    def is_final(self):
        return self.is_final_state
    
    def before_action(self, driver, action):
        self.had_popup = self.is_popup_exists(driver)
        self.url = self.get_domain(driver.current_url)
        
    def after_action(self, driver, action):
        self.have_popup = self.is_popup_exists(driver)
        self.new_url = self.get_domain(driver.current_url)
        self.is_final_state = self.new_url != self.url or not self.have_popup
    
    def calc_reward(self, is_success):
        if self.new_url != self.url:
            return -100
        elif self.had_popup and not self.have_popup:
            return 100
        elif not is_success:
            return -1
        else:
            return 0
    
    def calc_final_reward(self):
        if not self.have_popup:
            return 0
        else:
            # Haven't close popup
            return -100


In [None]:
import tempfile
import tracing.selenium_utils.common as common
import tracing.selenium_utils.controls as selenium_controls
from PIL import Image
from scipy import misc
import os
import numpy as np
import traceback
import time
    

class Environment:
    
    def __init__(self, rewards, user, width = 612, headless = True):
        self.rewards = rewards
        self.user = user
        self.width = width
        self.headless = headless
        self.step = 0
        self.driver = None

    def __enter__(self):
        if not self.driver:
            self.driver = common.create_chrome_driver(headless = self.headless)

    def is_final(self):        
        return self.rewards.is_final()
    
    def start(self, url):
        if not self.driver:
            self.driver = create_chrome_driver(headless = self.headless)

        if not url.startswith('http://') and not url.startswith('https://'):
            url = 'http://' + url
        
        self.driver.get(url)
        self.rewards.start()
        self.step = 0
        
        time.sleep(5)
    
    def __exit__(self, type, value, traceback):
        print("exit is called")
        if self.driver:
            self.driver.quit()
            self.driver = None

    # Returns 3D Numpy array of image representation
    # Channels x Width X Height
    def get_screenshot_as_array(self):
        assert self.driver is not None
        
        # 1. Create temp file
        _, tmp = tempfile.mkstemp(suffix='.png')

        # 2. Take a screenshot
        scale = common.get_scale(self.driver)    
        common.get_full_page_screenshot(self.driver, tmp, scale)    

        # 3. Resize image
        img = Image.open(tmp)
        width_scale = (self.width / float(img.size[0]))
        height = int((float(img.size[1]) * float(width_scale)))
        img = img.resize((self.width, height), Image.ANTIALIAS)
        img.save(tmp)

        # 4. Read as a numpy array
        image = misc.imread(tmp)
        os.remove(tmp)

        [h, w, _] = image.shape
        if h < w:
            to_add = np.ndarray([w-h, w, 3], dtype=float)
            to_add.fill(0)
            image = np.append(image, to_add, axis=0)
        
        return image

    # Returns input images for different controls
    def get_controls_as_input(self):
        
        source_image = self.get_screenshot_as_array()
            
        image = (source_image - 128.0) / 128.0
        scroll_to_top(self.driver)
        
        controls_info = selenium_controls.extract_controls(self.driver)
        result = []
        
        scroll_to_top(self.driver)
        
        for ctrl in controls_info:
            [h, w, _] = image.shape
            mask = np.ndarray([h, w, 1], dtype=float)
            mask.fill(0)
            top = ctrl.location['y']
            left = ctrl.location['x']
            bottom = top + ctrl.size['height']
            right = left + ctrl.size['width']

            mask[top:bottom, left:right, 0] = 1
            array = np.append(image, mask, axis=-1)
            result.append((ctrl, array))

        # Sort by Top then by Left of control location
        result.sort(key = lambda pair: (pair[0].location['y'], pair[0].location['x']))
        
        return result
    
    def apply_action(self, control, action):
        success = False
        try:
            self.rewards.before_action(self.driver, action)
            self.step += 1
            success = action.apply(control, self.driver, self.user)
        except:
            success = False
            traceback.print_exc()
        finally:
            self.rewards.after_action(self.driver, action)
            
        return self.rewards.calc_reward(success)
    
    def calc_final_reward(self):
        return self.rewards.calc_final_reward()

In [None]:
# env = Environment()
# controls = env.get_controls_as_input(driver)
# for control, array in controls:
#     print(control)
#     print(array)

In [None]:
popup_urls = [
    # Choose from two options popups
    'monstervape.com',
    'twistedcigs.com',
    'ecigsejuice.com',
    'vape-fuel.com',
    'powervapes.net',
    'ecigexpress.com',
    'ecigvaporstore.com',
    
    # Subscribe
    'cigarmanor.com',
    
    # Enter date popups
    'thecigarshop.com',
    'cigartowns.com',
    'docssmokeshop.com',
    'enhancedecigs.com',
    'betamorphecigs.com',
    
    # Accept Cookie
    'theglamourshop.com'
]

no_popup_urls = [
    'dixieems.com',
    'firstfitness.com',
    'sandlakedermatology.com',
    'dixieems.com',
    'anabolicwarfare.com',
    'jonessurgical.com',
    'srandd.com']

In [None]:
import threading
import random

class ActionsMemory:
    def __init__(self, gamma):
        self.imgs = []
        self.actions = []
        self.rewards = []
        self.gamma = gamma
    
    def append(self, img, action, reward):
        self.imgs.append(img)
        self.actions.append(action)
        self.rewards.append(reward)
    
    def to_input(self):
        
        sum_reward = 0
        rewards = []
        for i in range(len(self.imgs) - 1, -1, -1):
            sum_reward *= self.gamma
            sum_reward += self.rewards[i]
            rewards.append(sum_reward)
        
        return {
            "img": self.imgs,
            "actions": self.actions,
            "rewards": rewards
            }
        

class ActorLearnerWorker(threading.Thread):
    global_step = 0
    avg_reward = 0
    step_rewards = []
    
    def __init__(self, name, urls, global_model, env, max_steps = 1000):
        threading.Thread.__init__(self)
        
        self.name = name
        self.urls = urls
        self.session = global_model.session
        self.global_model = global_model
        self.local_model = A3CModel(global_model.num_actions, global_model = global_model, 
                                    session = self.session, name = self.name)
        self.env = env
        self.max_steps = max_steps
    
    def get_url(self):
        return random.choice(self.urls)
    
    def run(self):
        n_step = 5
        gamma = 0.99
        lr = 0.01
        entropy_l = 0.01
        
        with self.env:
            while ActorLearnerWorker.global_step < self.max_steps:
                ActorLearnerWorker.global_step += 1
                url = self.get_url()

                print('\n\nstarted url', 'http://' + url)
                self.env.start(url)

                controls = self.env.get_controls_as_input()
                print('extracted controls:', len(controls))
                # Popups specific, don't update window
                c_idx = 0
                sum_reward = 0

                while True:
                    memory = ActionsMemory(gamma = gamma)
                    # ToDo 1. Neat working with controls
                    # ToDo 2. Add scrolling?
                    while not self.env.is_final() and c_idx < len(controls):
                        ctrl, inp = controls[c_idx]
                        print('control:', ctrl)
                        action_id = self.local_model.get_action(inp)
                        action = Actions.actions[action_id]
                        print('got action:', action)
                        
                        reward = self.env.apply_action(ctrl, action)
                        print('reward:', reward)

                        memory.append(inp, action_id, reward)

                        c_idx += 1
                        sum_reward += reward * (gamma ** self.env.step)
                        
                        if (self.env.step + 1) % n_step == 0:
                            break

                    self.local_model.train_from_memory(memory, dropout = 1.0 , lr = lr, er = entropy_l)

                    if self.env.is_final() or c_idx >= len(controls):
                        sum_reward += self.env.calc_final_reward() * (gamma ** self.env.step)
                        ActorLearnerWorker.avg_reward = ActorLearnerWorker.avg_reward * 0.99 + 0.01 * sum_reward
                        ActorLearnerWorker.step_rewards.append(sum_reward)
                        print(sum_reward)
                        break

In [None]:
tf.reset_default_graph()
session = tf.Session()

num_workers = 1

global_model = A3CModel(len(Actions.actions), session = session)
workers = []

for i in range(num_workers):
    env = Environment(PopupRewardsCalculator(), user={}, headless=True)
    workers.append(ActorLearnerWorker("worker-{}".format(i), popup_urls, global_model, env, 1000))

coord = tf.train.Coordinator()
session.run(tf.global_variables_initializer())

worker_threads = []
for worker in workers:
    worker.start()

coord.join(workers)


In [None]:
# Test Environment
from tracing.selenium_utils.common import *

env = Environment(PopupRewardsCalculator(), user={}, headless=False)

with env:
    env.start('enhancedecigs.com')
    env.driver.set_script_timeout(100)

    ctrls = env.get_controls_as_input()
    assert len(ctrls) == 6
    for ctrl in ctrls:
        print(ctrl[0])

    dayCtrl = ctrls[0][0]
    monthCtrl = ctrls[1][0]
    yearCtrl = ctrls[2][0]
    check = ctrls[3][0]
    enter = ctrls[5][0]

    assert env.apply_action(dayCtrl, InputBDay()) == 0
    assert env.apply_action(monthCtrl, InputBMonth()) == 0
    assert env.apply_action(yearCtrl, InputBYear()) == 0
    assert env.apply_action(check, Click()) == 0
    assert env.apply_action(enter, Click()) == 100

    assert env.calc_final_reward() == 0


In [None]:
# Test Popups Detecting

import time

rewards = PopupRewardsCalculator()

driver = create_chrome_driver()

for url in no_popup_urls:
    print('url: ', url)
    driver.get('http://' + url)
    time.sleep(2)
    assert not rewards.is_popup_exists(driver)

for url in popup_urls:
    print('url: ', url)
    driver.get('http://' + url)
    time.sleep(2)
    assert rewards.is_popup_exists(driver)

driver.quit()    

In [None]:
import matplotlib.pyplot as plt

rewards = ActorLearnerWorker.step_rewards
plt.plot(np.arange(len(rewards)), rewards)
plt.xlabel('step')
plt.ylabel('Total moving reward')
plt.show()