In [None]:
from tracing.rl.actions import *
from tracing.rl.a3cmodel import A3CModel
from tracing.rl.rewards import PopupRewardsCalculator
from tracing.rl.environment import Environment
from tracing.rl.actor_learner import ActionsMemory
from tracing.rl.actor_learner import ActorLearnerWorker
import tensorflow as tf
import threading
import csv, re
import random

In [None]:
hard_popup_urls = [
    # Choose from two options popups
    'monstervape.com',
    'twistedcigs.com',
    'ecigsejuice.com',
    'vape-fuel.com',
    'www.powervapes.net/products/',
    'ecigexpress.com',
    
    # Subscribe
    'cigarmanor.com',  #Need email or extract close button
    'smokechophouse.com',
    
    
    # Enter date popups
    'thecigarshop.com',
    'cigartowns.com',
    'docssmokeshop.com',
    'enhancedecigs.com',
    'betamorphecigs.com',
    
    # Accept Cookie
    'theglamourshop.com',
    'smokingvaporstore.com',
    
    
]

no_popup_urls = [
    'dixieems.com',
    'firstfitness.com',
    'sandlakedermatology.com',
    'dixieems.com',
    'anabolicwarfare.com',
    'jonessurgical.com',
    'srandd.com'
]


def create_popup_dataset(dataset_file):
    import os.path
    import tracing.selenium_utils.common as common
    
    if os.path.isfile(dataset_file):
        result = []
        with open(dataset_file) as f:
            for row in f:
                url, is_popup = row.strip().split('\t')
                result.append((url, is_popup == '1'))
        return result
    
    print('started creating dataset...')
    
    def create_driver():
        for i in range(10):
            try:
                return common.create_chrome_driver(headless = True, size=(1280, 1024))
            except:
                time.sleep(2)
        
        raise Exception("can't create driver")
    
    smoke_urls = []
    pattern = '(smok)|(cig)|(vape)|(tobac)'
    with open('../../resources/pvio_vio_us_ca_uk_sample1.csv') as f:
        rows = csv.reader(f)
        for row in rows:
            url = row[0]
            if re.match(pattern, url):
                smoke_urls.append(url)

    print('Found {} urls'.format(len(smoke_urls)))
    
    tmp_file = dataset_file + '.tmp'
    with open(tmp_file, 'w') as f:
    
        driver = create_driver()
        checked_popup_urls = []
        for i, url in enumerate(smoke_urls):
            rewards = PopupRewardsCalculator()
            has_popup = False
            for _ in range(3):
                try:
                    driver.get('http://' + url)
                    has_popup = rewards.is_popup_exists(driver)
                    break
                except:
                    traceback.print_exc()
                    driver.quit()
                    driver = create_driver()
                    continue

            print(i, url, has_popup)        
            if has_popup:
                checked_popup_urls.append(url)
            
            f.write(url)
            f.write('\t')
            f.write('1' if has_popup else '0')
            f.write('\n')
            f.flush()
        
    
    os.rename(tmp_file, dataset_file)
    
    return checked_popup_urls

extracted_popup_urls = create_popup_dataset('popups_dataset.csv')

popup_urls = list([url for (url, is_popup) in extracted_popup_urls if is_popup==True])
random.shuffle(popup_urls)

split = int(len(popup_urls) * 0.8)
train_urls = popup_urls[:split]
test_urls = popup_urls[split:]


In [None]:
tf.reset_default_graph()
session = tf.Session()

num_workers = 4

global_model = A3CModel(len(Actions.actions), session = session, train_deep = False)
session.run(tf.global_variables_initializer())
global_model.init_from_checkpoint('inception_resnet_v2_2016_08_30.ckpt')

workers = []

for i in range(num_workers):
    env = Environment(PopupRewardsCalculator(), user={}, headless=True)
    worker = ActorLearnerWorker("worker-{}".format(i),
                                train_urls,
                                global_model, 
                                env, 
                                1000, 
                                n_step = 10, 
                                lr=0.001, 
                                l2 = 0.03,
                                entropy_l=0.2, 
                                dropout = 0.8, 
                                gamma=0.99)
    workers.append(worker)
    
coord = tf.train.Coordinator()

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

def start(worker):
    while True:
        try:
            worker.run()
            break
        except:
            pass
        

threads = []
for worker in workers[:4]:
    thread = threading.Thread(target=lambda: start(worker))
    thread.start()
    threads.append(thread)

while True:
    time.sleep(60)
    
    rewards = ActorLearnerWorker.step_rewards[:]
    if len(rewards) > 0:
        print('avg_reward:', sum(rewards) / len(rewards))
    plt.plot(np.arange(len(rewards)), rewards)
    plt.xlabel('episodes')
    plt.ylabel('Reward')
    plt.show()
    
coord.join(threads)

In [None]:
from tracing.rl.actor_learner import ActionsMemory
from tracing.rl.environment import Environment
from tracing.rl.actions import *
from tracing.rl.a3cmodel import A3CModel
from tracing.rl.rewards import PopupRewardsCalculator
from tracing.rl.environment import Environment
from tracing.rl.actor_learner import ActorLearnerWorker
import threading
import numpy as np
import PIL
import time

from IPython.display import display, Image


env = Environment(PopupRewardsCalculator(), user={}, headless=True)
with env:
    env.start('docssmokeshop.com')    
    controls = env.get_controls()
    for ctrl in controls:
        print(str(ctrl)[:100])
        inp = env.get_control_as_input(ctrl)
        
        rgb = (inp * 128 + 128).astype(np.uint8)
        img = PIL.Image.fromarray(rgb, 'RGB')
        img.save('test.png')
        display(Image(filename='test.png'))


In [None]:
batch = memory.to_input()

self = global_model

feed_dict = {self.img: batch['img'], 
             self.dropout: 1.0,
             self.possible_actions: batch['possible_actions'],
             self.performed_actions: [4, 3],
             self.rewards: [3, 3],
             self.lr: 0.01,
             self.er: 0.002
            }

net_v = session.run(self.net, feed_dict)
print(net_v[0, :])

feed_dict_1 = {self.img: [batch['img'][0]], 
             self.dropout: 1.0,
             self.possible_actions: [batch['possible_actions'][0]],
             self.performed_actions: [4],
             self.rewards: [3],
             self.lr: 0.01,
             self.er: 0.002
            }

net_1 = session.run(self.net, feed_dict=feed_dict_1)
print(net_1[0, :])

#pl, el, vl = session.run([self.policy_loss, self.entropy_loss, self.value_loss], feed_dict)
#print(pl, el, vl)

session.run(self.train_op, feed_dict = feed_dict_1)

print(net_v.shape)

In [None]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets as nets
import nets.nasnet.pnasnet
import nets.inception_resnet_v2
from nets.inception_utils import inception_arg_scope

import numpy as np


tf.reset_default_graph()
num_actions = len(Actions.actions)

img = tf.placeholder(tf.float32, (None, 224, 224, 3), "img")

#net, endpoints = nets.nasnet.pnasnet.build_pnasnet_mobile(img, None, is_training=True)
with slim.arg_scope(inception_arg_scope()):
    net, endpoints = nets.inception_resnet_v2.inception_resnet_v2(img, None)
    
fc2 = slim.fully_connected(net, 100)
logits = slim.fully_connected(fc2, num_actions, activation_fn=None)
pi = tf.nn.softmax(logits)

In [None]:
# Test Environment
from tracing.selenium_utils.common import *

env = Environment(PopupRewardsCalculator(), user={}, headless=False)

with env:
    env.start('enhancedecigs.com')

    ctrls = env.get_controls()
    assert len(ctrls) == 6

    dayCtrl = ctrls[0]
    monthCtrl = ctrls[1]
    yearCtrl = ctrls[2]
    check = ctrls[3]
    enter = ctrls[5]

    assert env.apply_action(dayCtrl, InputBDay()) == 0
    assert env.apply_action(monthCtrl, InputBMonth()) == 0
    assert env.apply_action(yearCtrl, InputBYear()) == 0
    assert env.apply_action(check, Click()) == 0
    assert env.apply_action(enter, Click()) == 100

    assert env.calc_final_reward() == 0


In [None]:
# Test Popups Detecting

import time

rewards = PopupRewardsCalculator()

driver = create_chrome_driver()

for url in no_popup_urls:
    print('url: ', url)
    driver.get('http://' + url)
    time.sleep(2)
    assert not rewards.is_popup_exists(driver)

for url in popup_urls:
    print('url: ', url)
    driver.get('http://' + url)
    time.sleep(2)
    assert rewards.is_popup_exists(driver)

driver.quit()    