In [None]:
import random
import csv
import logging

from shop_tracer import *
from selenium_helper import *
from status import *

import trace_logger
import common_actors
import user_data

from contextlib import contextmanager

### Prepare different urls to analyze

In [None]:
# All urls
all_urls = []
with open('../resources/pvio_vio_us_ca_uk_sample1.csv', 'r') as f:
    rows = csv.reader(f)
    for row in rows:
        url = row[0]
        if url:
            all_urls.append(url)

# Random sample urls
random.seed(1)
sample_urls = random.sample(all_urls, 100)

# Some good urls to analyze by hands
good_urls = [
    'theglamourshop.com',
    'dixieems.com',
    'firstfitness.com',
    'sandlakedermatology.com',
    'dixieems.com',
    'anabolicwarfare.com',
    'vape-fuel.com',
    'jonessurgical.com',
    'srandd.com',
    'ambarygardens.com'
]

complex_buy_urls = [
    'escobarcigars.com' # buy now
]

# Shops with complex popups
popup_urls = [
    # Choose from two options popups
    'monstervape.com',
    'twistedcigs.com',
    'ecigsejuice.com',
    'vape-fuel.com',
    'powervapes.net',
    'ecigexpress.com',
    'ecigvaporstore.com',
    
    # Subscribe
    'cigarmanor.com',
    
    # Enter date popups
    'thecigarshop.com',
    'cigartowns.com',
    'docssmokeshop.com',
    'enhancedecigs.com',
    'betamorphecigs.com'
]


In [None]:
selenium_path = '/usr/bin/chromedriver'

@contextmanager
def get_tracer(headless = False):
    logger = trace_logger.FileTraceLogger('results.jsonl', 'images')

    global user_info, billing_info, selinium_path
    tracer = ShopTracer(user_data.get_user_data,
                          selenium_path, 
                          headless = headless, 
                          trace_logger = logger)
    common_actors.add_tracer_extensions(tracer)
    
    yield tracer

def get_driver(headless = False):
    global selenium_path   
    return create_chrome_driver(selenium_path, headless = headless)


### Set up logging level

In [None]:
logger = logging.getLogger('shop_tracer')
logger.setLevel(logging.DEBUG)

handler = logging.StreamHandler()
formatter = logging.Formatter(
        '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

### Start tracing urls

In [None]:
# ToDo remove before merge
import csv

urls_to_test = []
with open('../resources/url_states.csv') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='\\')
    for row in reader:
        url, status = row
        if status == "checkout_page" or status == "purchased":
            urls_to_test.append(url)


In [None]:
results = []

with get_tracer() as tracer:
    for url in urls_to_test:
        print('\n\n{}'.format(url))
        status = tracer.trace(url, attempts=3)
        print(status)
        results.append(status)


In [None]:
states = {}
for status in results:
    if isinstance(status, ProcessingStatus):
        states[status.state] = states.get(status.state, 0) + 1
        
print(states)

In [None]:
import json
from IPython.core.display import Image, display

with open('results.jsonl', 'r') as f:
    for line in f:
        trace = json.loads(line)
        print('\n\n{} - {}'.format(trace['domain'], trace['status']['state']))
        for step in trace['steps']:
            print('url: {} state: {} handler: {}'.format(step['url'], step['state'], step['handler']))
            display(Image(step['screen_path'], width=640, unconfined=True))