In [None]:
from tracing.rl.actions import *
from tracing.rl.a3cmodel import A3CModel
from tracing.rl.rewards import PopupRewardsCalculator
from tracing.rl.environment import Environment
from tracing.rl.actor_learner import ActionsMemory
from tracing.rl.actor_learner import ActorLearnerWorker
import tensorflow as tf
import threading
import csv, re
import random

from create_dataset import load_dataset

In [None]:
hard_popup_urls = [
    # Choose from two options popups
    'monstervape.com',
    'twistedcigs.com',
    'ecigsejuice.com',
    'vape-fuel.com',
    'www.powervapes.net/products/',
    'ecigexpress.com',
    
    # Subscribe
    'cigarmanor.com',  #Need email or extract close button
    'smokechophouse.com',
    
    
    # Enter date popups
    'thecigarshop.com',
    'cigartowns.com',
    'docssmokeshop.com',
    'enhancedecigs.com',
    'betamorphecigs.com',
    
    # Accept Cookie
    'theglamourshop.com',
    'smokingvaporstore.com',   
]

no_popup_urls = [
    'dixieems.com',
    'firstfitness.com',
    'sandlakedermatology.com',
    'dixieems.com',
    'anabolicwarfare.com',
    'jonessurgical.com',
    'srandd.com'
]

urls = load_dataset('../../../resources/popups_dataset.csv')

popup_urls = list([status['url'] for status in urls if status['has_popup']==True])
random.shuffle(popup_urls)

split = int(len(popup_urls) * 0.8)
train_urls = popup_urls[:split]
test_urls = popup_urls[split:]

print('train size: ', len(train_urls))
print('test size: ', len(test_urls))

In [None]:
tf.reset_default_graph()
session = tf.Session()

num_workers = 4

global_model = A3CModel(len(Actions.actions), session = session, train_deep = False)
session.run(tf.global_variables_initializer())
global_model.init_from_checkpoint('inception_resnet_v2_2016_08_30.ckpt')

workers = []

for i in range(num_workers):
    env = Environment(PopupRewardsCalculator(), user={}, headless=True)
    worker = ActorLearnerWorker("worker-{}".format(i),
                                train_urls,
                                global_model, 
                                env, 
                                1000, 
                                n_step = 10, 
                                lr=0.001, 
                                l2 = 0.03,
                                entropy_l=0.2, 
                                dropout = 0.8, 
                                gamma=0.99)
    workers.append(worker)
    
coord = tf.train.Coordinator()

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

def start(worker):
    while True:
        try:
            worker.run()
            break
        except:
            pass
        

threads = []
for worker in workers:
    thread = threading.Thread(target=lambda: start(worker))
    thread.daemon = True
    thread.start()
    threads.append(thread)

while True:
    time.sleep(60)
    
    rewards = ActorLearnerWorker.step_rewards[:]
    if len(rewards) > 0:
        print('avg_reward:', sum(rewards) / len(rewards))
    plt.plot(np.arange(len(rewards)), rewards)
    plt.xlabel('episodes')
    plt.ylabel('Reward')
    plt.show()
    
coord.join(threads)

In [None]:
from tracing.rl.actor_learner import ActionsMemory
from tracing.rl.environment import Environment
from tracing.rl.actions import *
from tracing.rl.a3cmodel import A3CModel
from tracing.rl.rewards import PopupRewardsCalculator
from tracing.rl.environment import Environment
from tracing.rl.actor_learner import ActorLearnerWorker
import threading
import numpy as np
import PIL
import time

import tracing.selenium_utils.controls as selenium_controls
import tracing.selenium_utils.common as common

from IPython.display import display, Image

env = Environment(PopupRewardsCalculator(), user={}, headless=True)
with env:
    assert env.start('revolutionvapor.com')
    print('started')
    
    while env.has_next_control():
        ctrl = env.get_next_control()
        
        print('\n\n')
        print(ctrl.elem.get_attribute('outerHTML'))
        print(ctrl.location)
        print(ctrl.size)
        print(str(ctrl)[:100])
        
        inp = env.get_control_as_input(ctrl)
        
        rgb = (inp * 128 + 128).astype(np.uint8)
        img = PIL.Image.fromarray(rgb, 'RGB')
        img.save('test.png')
        display(Image(filename='test.png'))    

In [None]:
 def get_control_as_input(self, ctrl):
    x, y = selenium_controls.scroll_to_element(self.driver, ctrl)
    if y < 0:
        for i in range(5):
            # Try to scroll 1000 pixels lower if it's a hidden menu item
            common.scroll_to(self.driver, 200 * i)
            time.sleep(0.1)
            if ctrl.location['y'] >= 0:
                scroll = common.get_scroll_top(self.driver)
                y = ctrl.location['y'] - scroll
                x = ctrl.location['x']


    assert ctrl.location['y'] >= 0

    time.sleep(0.2)

    scale = self.get_screen_scale(ctrl)
    image = self.get_screenshot_as_array(scale=scale)

    [h, w, _] = image.shape
    top = y + self.frame_y
    left = x +  self.frame_x
    bottom = top + ctrl.size['height']
    right = left + ctrl.size['width']

    top = int(top * self.scale)
    left = int(left * self.scale) 
    bottom = int(bottom * self.scale)
    right = int(right * self.scale)

    top = max(top, 0)
    left = max(left, 0)
    bottom = min(bottom, h)
    right = min(right, w)

    print(top, bottom)
    print(left, right)
    assert(bottom > top and right > left)

    if top > self.crop_pad:
        image[:top-self.crop_pad, :, :] = 0
    if bottom + self.crop_pad < h:
        image[bottom+self.crop_pad:, :, :] = 0
    if left > self.crop_pad:
        image[:, :left-self.crop_pad, :] = 0
    if right + self.crop_pad < w:
        image[:, right+self.crop_pad:, :] = 0

    image = self.crop_image(image, (left + right) // 2, (top + bottom) // 2)
        
    return (image - 128.0) / 128.0


In [None]:
batch = memory.to_input()

self = global_model

feed_dict = {self.img: batch['img'], 
             self.dropout: 1.0,
             self.possible_actions: batch['possible_actions'],
             self.performed_actions: [4, 3],
             self.rewards: [3, 3],
             self.lr: 0.01,
             self.er: 0.002
            }

net_v = session.run(self.net, feed_dict)
print(net_v[0, :])

feed_dict_1 = {self.img: [batch['img'][0]], 
             self.dropout: 1.0,
             self.possible_actions: [batch['possible_actions'][0]],
             self.performed_actions: [4],
             self.rewards: [3],
             self.lr: 0.01,
             self.er: 0.002
            }

net_1 = session.run(self.net, feed_dict=feed_dict_1)
print(net_1[0, :])

#pl, el, vl = session.run([self.policy_loss, self.entropy_loss, self.value_loss], feed_dict)
#print(pl, el, vl)

session.run(self.train_op, feed_dict = feed_dict_1)

print(net_v.shape)

In [None]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets as nets
import nets.nasnet.pnasnet
import nets.inception_resnet_v2
from nets.inception_utils import inception_arg_scope

import numpy as np


tf.reset_default_graph()
num_actions = len(Actions.actions)

img = tf.placeholder(tf.float32, (None, 224, 224, 3), "img")

#net, endpoints = nets.nasnet.pnasnet.build_pnasnet_mobile(img, None, is_training=True)
with slim.arg_scope(inception_arg_scope()):
    net, endpoints = nets.inception_resnet_v2.inception_resnet_v2(img, None)
    
fc2 = slim.fully_connected(net, 100)
logits = slim.fully_connected(fc2, num_actions, activation_fn=None)
pi = tf.nn.softmax(logits)

In [None]:
# Test Environment
from tracing.selenium_utils.common import *

env = Environment(PopupRewardsCalculator(), user={}, headless=False)

with env:
    env.start('enhancedecigs.com')

    ctrls = env.get_controls()
    assert len(ctrls) == 6

    dayCtrl = ctrls[0]
    monthCtrl = ctrls[1]
    yearCtrl = ctrls[2]
    check = ctrls[3]
    enter = ctrls[5]

    assert env.apply_action(dayCtrl, InputBDay()) == 0
    assert env.apply_action(monthCtrl, InputBMonth()) == 0
    assert env.apply_action(yearCtrl, InputBYear()) == 0
    assert env.apply_action(check, Click()) == 0
    assert env.apply_action(enter, Click()) == 100

    assert env.calc_final_reward() == 0


In [None]:
# Test Popups Detecting

import time

rewards = PopupRewardsCalculator()

driver = create_chrome_driver()

for url in no_popup_urls:
    print('url: ', url)
    driver.get('http://' + url)
    time.sleep(2)
    assert not rewards.is_popup_exists(driver)

for url in popup_urls:
    print('url: ', url)
    driver.get('http://' + url)
    time.sleep(2)
    assert rewards.is_popup_exists(driver)

driver.quit()    