In [None]:
from tracing.rl.actions import *
from tracing.rl.a3cmodel import A3CModel
from tracing.rl.rewards import PopupRewardsCalculator
from tracing.rl.environment import Environment
from tracing.rl.actor_learner import ActionsMemory
from tracing.rl.actor_learner import ActorLearnerWorker
import tensorflow as tf
import threading

In [None]:
popup_urls = [
    # Choose from two options popups
    'monstervape.com',
    'twistedcigs.com',
    'ecigsejuice.com',
    'vape-fuel.com',
    'www.powervapes.net/products/',
    'ecigexpress.com',
    'ecigvaporstore.com',
    
    # Subscribe
    'cigarmanor.com',
    
    # Enter date popups
    'thecigarshop.com',
    'cigartowns.com',
    'docssmokeshop.com',
    'enhancedecigs.com',
    'betamorphecigs.com',
    
    # Accept Cookie
    'theglamourshop.com'
]

no_popup_urls = [
    'dixieems.com',
    'firstfitness.com',
    'sandlakedermatology.com',
    'dixieems.com',
    'anabolicwarfare.com',
    'jonessurgical.com',
    'srandd.com'
]

In [None]:
tf.reset_default_graph()
session = tf.Session()

num_workers = 4

global_model = A3CModel(len(Actions.actions), session = session)
global_model.init()
workers = []
#popup_urls = ['cigartowns.com']

for i in range(num_workers):
    env = Environment(PopupRewardsCalculator(), user={}, headless=True)
    worker = ActorLearnerWorker("worker-{}".format(i), popup_urls,
                                      global_model, env, 100, 
                                lr=0.001, 
                                entropy_l=0.2, 
                                dropout = 0.8, 
                                gamma=0.99)
    workers.append(worker)

    worker.local_model.init()
    
coord = tf.train.Coordinator()

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

threads = []
for worker in workers:
    worker.max_steps = 1000
    worker.urls = popup_urls
    thread = threading.Thread(target=lambda: worker.run())
    thread.start()
    threads.append(thread)

while True:
    time.sleep(60)
    
    rewards = ActorLearnerWorker.step_rewards[:]
    if len(rewards) > 0:
        print('avg_reward:', sum(rewards) / len(rewards))
    plt.plot(np.arange(len(rewards)), rewards)
    plt.xlabel('episodes')
    plt.ylabel('Reward')
    plt.show()
    
coord.join(threads)

In [None]:
from tracing.rl.actor_learner import ActionsMemory
from tracing.rl.environment import Environment
from tracing.rl.actions import *
from tracing.rl.a3cmodel import A3CModel
from tracing.rl.rewards import PopupRewardsCalculator
from tracing.rl.environment import Environment
from tracing.rl.actor_learner import ActorLearnerWorker
import tensorflow as tf
import threading


session.run(tf.global_variables_initializer())

env = Environment(PopupRewardsCalculator(), user={}, headless=True)
with env:
    memory = ActionsMemory(0.99)
    env.start('vape-fuel.com')
    controls = env.get_controls()
    worker = workers[0]
    for ctrl in controls:
        inp = env.get_control_as_input(ctrl)
        action_id = worker.local_model.get_action(inp)

        score = worker.local_model.estimate_score(inp)

        memory.append(inp, 2, 0)

    memory.set_final_score(100)

In [None]:
# Test Environment Images
from PIL import Image
import numpy as np
from tracing.selenium_utils.common import *

env = Environment(PopupRewardsCalculator(), user={}, headless=True)

with env:
    env.start('enhancedecigs.com')
    ctrls = env.get_controls()
    inps = []
    for ctrl in ctrls:
        memory = ActionsMemory(0.99)

        inp = env.get_control_as_input(ctrl)
        inps.append(inp)
        memory.append(inp, 2, 0, ctrl)
        
batch = memory.to_input()

for i, inp in enumerate(inps):
    print(inp.shape)
    rgb = (inp * 128 + 128).astype(np.uint8)
    img = Image.fromarray(rgb, 'RGB')
    img.save('img-{}.png'.format(i))

In [None]:
# Test Environment
from tracing.selenium_utils.common import *

env = Environment(PopupRewardsCalculator(), user={}, headless=False)

with env:
    env.start('enhancedecigs.com')

    ctrls = env.get_controls()
    assert len(ctrls) == 6

    dayCtrl = ctrls[0]
    monthCtrl = ctrls[1]
    yearCtrl = ctrls[2]
    check = ctrls[3]
    enter = ctrls[5]

    assert env.apply_action(dayCtrl, InputBDay()) == 0
    assert env.apply_action(monthCtrl, InputBMonth()) == 0
    assert env.apply_action(yearCtrl, InputBYear()) == 0
    assert env.apply_action(check, Click()) == 0
    assert env.apply_action(enter, Click()) == 100

    assert env.calc_final_reward() == 0


In [None]:
# Test Popups Detecting

import time

rewards = PopupRewardsCalculator()

driver = create_chrome_driver()

for url in no_popup_urls:
    print('url: ', url)
    driver.get('http://' + url)
    time.sleep(2)
    assert not rewards.is_popup_exists(driver)

for url in popup_urls:
    print('url: ', url)
    driver.get('http://' + url)
    time.sleep(2)
    assert rewards.is_popup_exists(driver)

driver.quit()    