In [None]:
from tracing.rl.actions import *
from tracing.rl.a3cmodel import A3CModel
from tracing.rl.rewards import *
from tracing.rl.environment import Environment
from tracing.rl.actor_learner import ActionsMemory
from tracing.rl.actor_learner import ActorLearnerWorker
import tensorflow as tf
import threading
import csv, re
import random

from create_dataset import read_popups_rl_dataset
from tracing.training.classification.page_classifier import PageClassifier

In [None]:
dataset = '../../../resources/popups_dataset.csv'
pretrained_checkpoint = '../pretrain/checkpoints/pretrain_checkpoint-10'

In [None]:
hard_popup_urls = [
    # Choose from two options popups
    'monstervape.com',
    'twistedcigs.com',
    'ecigsejuice.com',
    'vape-fuel.com',
    'www.powervapes.net/products/',
    'ecigexpress.com',
    
     # Enter date popups
    'thecigarshop.com',
    'cigartowns.com',
    'docssmokeshop.com',
    'enhancedecigs.com',
    'betamorphecigs.com',
   
    # Subscribe
    'cigarmanor.com',  #Need email or extract close button
    'smokechophouse.com',
    
    
    # Accept Cookie
    'theglamourshop.com',
    'smokingvaporstore.com',   
]

no_popup_urls = [
    'dixieems.com',
    'firstfitness.com',
    'sandlakedermatology.com',
    'dixieems.com',
    'anabolicwarfare.com',
    'jonessurgical.com',
    'srandd.com'
]

urls = read_popups_rl_dataset(dataset)

popup_urls = list([status['url'] for status in urls if status['has_popup']==True])
random.shuffle(popup_urls)

split = int(len(popup_urls) * 0.8)
train_urls = popup_urls[:split]
test_urls = popup_urls[split:]

print('train size: ', len(train_urls))
print('test size: ', len(test_urls))

In [None]:
g1 = tf.Graph()
with g1.as_default():
    page_classifier = PageClassifier.get_pretrained()

tf.reset_default_graph()
session = tf.Session()
    
num_workers = 4

fixed_probas = {0: 1., 1: 1., 2: 1., 4: 1.}

global_model = A3CModel(len(Actions.actions), session = session, train_deep = False, 
                        fixed_gate_probas = fixed_probas)
session.run(tf.global_variables_initializer())

if pretrained_checkpoint:
    saver = tf.train.Saver()
    saver.restore(session, pretrained_checkpoint)

workers = []

for i in range(num_workers):
    env = Environment(PageRewardsCalculator.for_popups(page_classifier), user={}, headless=True)
    worker = ActorLearnerWorker("worker-{}".format(i),
                                train_urls,
                                global_model, 
                                env, 
                                1000, 
                                n_step = 10, 
                                lr=0.001, 
                                l2 = 0.03,
                                entropy_l=0.2, 
                                dropout = 0.8, 
                                gamma=0.99)
    workers.append(worker)
    
coord = tf.train.Coordinator()

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

def start(worker):
    while True:
        try:
            worker.run()
            break
        except:
            pass
        

threads = []
for worker in workers:
    thread = threading.Thread(target=lambda: start(worker))
    thread.daemon = True
    thread.start()
    threads.append(thread)

while True:
    time.sleep(60)
    
    rewards = ActorLearnerWorker.step_rewards[:]
    if len(rewards) > 0:
        print('avg_reward:', sum(rewards) / len(rewards))
    plt.plot(np.arange(len(rewards)), rewards)
    plt.xlabel('episodes')
    plt.ylabel('Reward')
    plt.show()
    
coord.join(threads)

In [None]:
env = Environment(PopupRewardsCalculator(), user={}, headless=False)

worker = ActorLearnerWorker("worker-{}".format(i),
                            train_urls,
                            global_model, 
                            env, 
                            100, 
                            n_step = 10, 
                            lr=0.001, 
                            l2 = 0.03,
                            entropy_l=0.2, 
                            dropout = 0.8, 
                            gamma=0.99)

In [None]:
from tracing.rl.actor_learner import ActionsMemory
from tracing.rl.actions import *
from tracing.rl.a3cmodel import A3CModel
from tracing.rl.rewards import PopupRewardsCalculator
from tracing.rl.actor_learner import ActorLearnerWorker
import threading
import numpy as np
import PIL
import time

import tracing.selenium_utils.controls as selenium_controls
import tracing.selenium_utils.common as common

from IPython.display import display, Image

env = Environment(PopupRewardsCalculator(), user={}, headless=True)
with env:
    assert env.start('goldenstatevapor.com')
    print('started')
    
    while env.has_next_control():
        ctrl = env.get_next_control()
        
        print('\n\n')

        print('step:', env.step)
        
        print(ctrl.elem.get_attribute('outerHTML'))
        print(ctrl.location)
        print(ctrl.size)
        print(str(ctrl)[:100])
        
        inp = env.get_control_as_input(ctrl)
        
        rgb = (inp * 128 + 128).astype(np.uint8)
        img = PIL.Image.fromarray(rgb, 'RGB')
        img.save('test.png')
        display(Image(filename='test.png'))   
        
        if env.step == 0:
            reward = env.apply_action(ctrl, InputBMonth())
            print('reward:', reward)
        elif env.step == 1:
            reward = env.apply_action(ctrl, InputBDay())
            print('reward:', reward)
        elif env.step == 2:
            reward = env.apply_action(ctrl, InputBYear())
            print('reward:', reward)
        elif env.step == 3:
            reward = env.apply_action(ctrl, Click())
            print('reward:', reward)
        else:
            env.apply_action(ctrl, Nothing())
        

In [None]:
# Test Environment
from tracing.selenium_utils.common import *

env = Environment(PopupRewardsCalculator(), user={}, headless=False)

with env:
    env.start('enhancedecigs.com')

    ctrls = env.get_controls()
    assert len(ctrls) == 6

    dayCtrl = ctrls[0]
    monthCtrl = ctrls[1]
    yearCtrl = ctrls[2]
    check = ctrls[3]
    enter = ctrls[5]

    assert env.apply_action(dayCtrl, InputBDay()) == 0
    assert env.apply_action(monthCtrl, InputBMonth()) == 0
    assert env.apply_action(yearCtrl, InputBYear()) == 0
    assert env.apply_action(check, Click()) == 0
    assert env.apply_action(enter, Click()) == 100

    assert env.calc_final_reward() == 0


In [None]:
# Test Popups Detecting

import time

rewards = PopupRewardsCalculator()

driver = create_chrome_driver()

for url in no_popup_urls:
    print('url: ', url)
    driver.get('http://' + url)
    time.sleep(2)
    assert not rewards.is_popup_exists(driver)

for url in popup_urls:
    print('url: ', url)
    driver.get('http://' + url)
    time.sleep(2)
    assert rewards.is_popup_exists(driver)

driver.quit()    