# Local imports

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.utils import ChromeType
from selenium.webdriver.common.by import By
import random
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
import string
from threading import Thread
from collections import defaultdict
import torch
from torch.nn.functional import gumbel_softmax
import numpy as np
import time
from pyymatcher import PyyMatcher, get_close_matches
import cryptohash as chash
import os
import networkx as nx
import copy
import pickle

def default_factory():
    return ["",0,1]

# Setting up Environment

In [2]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

class Env:
    def __init__(self,url,login_elements,login_values):
        self.url = url
        self.login_actions = []
        for element,value in zip(login_elements,login_values):
            tag = element.split()[0].replace("<","").strip()
            webaction = WebAction(element,tag,value)
            self.login_actions.append(webaction)
        self.Qtable = defaultdict(default_factory)
        self.states = []

class WebState:
    def __init__(self,url,sequence,actions):
        self.url = url
        self.sequence = sequence
        self.actions = actions
        self.name = ""
        
class WebAction:
    def __init__(self,action_string,tag,data=None):
        self.action_string = action_string
        self.tag = tag
        self.data = data


def get_actions():
    actions = []
    for target_tag in ['input','button','a','select']:
        source_tag  = driver.find_elements(By.TAG_NAME, target_tag)
        if len(source_tag)>=1:
            for tag in source_tag:
                action_string = tag.get_attribute("outerHTML")
                webaction = WebAction(action_string,tag.tag_name)
                actions.append(webaction)
    return actions

def get_state(env):
    state = ""
    for target_tag in ['input','button','a','select']:
        source_tag  = driver.find_elements(By.TAG_NAME, target_tag)
        if len(source_tag)>=1:
            for tag in source_tag:
                state+=target_tag
    actions = get_actions()
    url = driver.current_url
    webstate = WebState(url,state,actions)
    statename = chash.md5(webstate.sequence)
    
    found = False
    for i,visited_state in enumerate(env.states):
        if visited_state.url==webstate.url:
            obj = PyyMatcher(webstate.sequence, visited_state.sequence)
            if obj.ratio()>=0.8:
                env.states[i].name = statename
                webstate = env.states[i]
                found = True
                break
    if not found:
        env.states.append(webstate)
    
    driver.save_screenshot("results/"+statename+".png")
    with open("results/"+statename+".html","w") as htmlwriter:
        htmlwriter.write(driver.page_source)
    return webstate

def take_action(action,special_actions):
    element = None
    source_tag  = driver.find_elements(By.TAG_NAME, action.tag)
    for tag in source_tag:
        action_string = tag.get_attribute("outerHTML")
        if action_string==action.action_string:
            element = tag
            break
    
    clickable = ["a","button","submit","select","radio","checkbox","image"]
    writable = ["input","text","password","search"]
    if element.tag_name in clickable or element.get_attribute('Type') in clickable:
        if element.tag_name=="select":
            try:
                select = Select(element)
                option = random.choice(select.options)
                try:
                    option.click()
                    time.sleep(0.5)
                    status = 1
                except:
                    status = 0
            except:
                status = 0
        else:
            try:
                element.click()
                time.sleep(0.5)
                status = 1
            except:
                status = 0
                
    elif element.tag_name in writable or element.get_attribute('Type') in writable:
        for elem in special_actions:
            action_string = element.get_attribute("outerHTML")
            if action_string==elem.action_string:
                element.clear()
                element.send_keys(elem.data)
                return
        letters = string.ascii_lowercase
        result_str = ''.join(random.choice(letters) for i in range(10))
        element.clear()
        element.send_keys(result_str)
        
def get_best_action(qtable,webstate):
    actions = webstate.actions
    state = webstate.name
    actions_list = []
    qvalues = []
    
    for action in actions:
        qstring = "!@!".join([webstate.name,action.action_string])
        qtable[qstring]
    
    for action in actions:
        for key in qtable.keys():
            st,ac = key.split("!@!")
            
            if st==state and ac==action.action_string:
                actions_list.append(action)
                qvalues.append(qtable[key][1])
    logits = torch.FloatTensor(qvalues)
    m = gumbel_softmax(logits, tau=1).cpu().numpy()
    selected = np.random.choice(actions_list, 1,p=m)[0]
    
    #selected = actions_list[np.argmax(qvalues)]
    return selected

def get_maxQ(qtable,webstate):
    actions = webstate.actions
    state = webstate.name
    qvalues = []
    for action in actions:
        qstring = "!@!".join([webstate.name,action.action_string])
        qvalues.append(qtable[qstring][1])
    return max(qvalues)

def updateDFA(DFA,state,action,next_state,visited_count):
    try:
        found = False
        for k in DFA[state][next_state].keys():
            if DFA[state][next_state][k]["action"]==action:
                DFA[state][next_state][k]["weight"] = visited_count
                found = True
                break
        if not found:
            DFA.add_edges_from([(state, next_state, {"action": action,"weight":visited_count})])
            
    except KeyError:
        DFA.add_edges_from([(state, next_state, {"action": action,"weight":visited_count})])
    
    return DFA

def select_trace(DFA,qtable,currentstate):
    max_count = 9999999
    max_key = ""
    for node in nx.bfs_edges(DFA,currentstate):
        for k in DFA[node[0]][node[1]].keys():
            if DFA[node[0]][node[1]][k]["weight"]<max_count and "" not in node:
                max_count = DFA[node[0]][node[1]][k]["weight"]
                max_key = node[1]
    
    path = nx.shortest_path(DFA, currentstate, max_key, weight='weight')
    return path

def run_trace(DFA,qtable,currentstate):
    webstates = []
    for name in path:
        for state in env.states:
            if state.name==name:
                webstates.append(copy.deepcopy(state))
    
    start = webstates[0]
    i = 1
    n = len(webstates)
    while i<n:
        next_state = webstates[i]
        filter_keys = []
        for k in env.Qtable.keys():
            st,ac = k.split("!@!")
            if st==start.name:
                if env.Qtable[k][0]==next_state.name:
                    filter_keys.append(k)
        
        k = filter_keys[np.argmin([env.Qtable[k][2] for k in filter_keys])]
        st,ac = k.split("!@!")
        temp_ac = webstates[i-1].actions[0]
        for action in webstates[i-1].actions:
            if ac == action.action_string:
                temp_ac = action
                break
        webstates[i-1].actions = [temp_ac]
        start = webstates[i]
        i+=1
    
    for state in webstates[:-1]:
        action = state.actions[0]
        take_action(action,env.login_actions)



Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [C:\Users\asmar\.wdm\drivers\chromedriver\win32\100.0.4896.60\chromedriver.exe] found in cache


# Required Info for website

In [3]:
url = "http://192.168.1.68/timeclock/"
login_actions = ['<input type="text" name="login_userid">',
                '<input type="password" name="login_password">']
login_details = ['admin','admin']

env = Env(url,login_actions,login_details)

# Start Exploring

In [None]:
activity_time = 1800
steps = 100
tracetime = 120
gamma = 0.95
CLOSE = False

def some_task():
    global CLOSE,activity_time
    time.sleep(activity_time)
    CLOSE=True

t = Thread(target=some_task)
t.start()


DFA = nx.MultiDiGraph()
episode = 0
istep = 0
tracetime1 = time.time()
total_states = len(env.states)
startstate = None
path = []
while True:
    print("Episode no: ",episode)
    #if episode>10:
    #    break
    if CLOSE:
        break
    
    if len(path)<1:
        driver.get(env.url)
    else:
        driver.get(env.url)
        run_trace(DFA,env.Qtable,startstate.name)
        path = []
        
    for istep in range(steps):
        try:
            if env.Qtable=={}:
                driver.get(env.url)
                webstate = get_state(env)
                startstate = webstate
                action = random.choice(webstate.actions)
            else:
                webstate = get_state(env)
                action = get_best_action(env.Qtable,webstate)

            take_action(action,env.login_actions)
            if env.url in driver.current_url:
                qstring = "!@!".join([webstate.name,action.action_string])
                reward = 1/env.Qtable[qstring][2]
                next_state = get_state(env)
                env.Qtable[qstring][0] = next_state.name
                env.Qtable[qstring][2] += 1
                maxQ = get_maxQ(env.Qtable,next_state)
                
                env.Qtable[qstring][1] = reward+gamma*maxQ
                #env.Qtable[qstring][1] = env.Qtable[qstring][1]-(reward+gamma*maxQ)
                
                DFA = updateDFA(DFA,webstate.name,action.action_string,next_state.name,env.Qtable[qstring][2])

            else:
                qstring = "!@!".join([webstate.name,action.action_string])
                reward = -9999
                next_state = get_state(env)
                env.Qtable[qstring][0] = next_state.name
                env.Qtable[qstring][2] = 9999
                #maxQ = get_maxQ(env.Qtable,get_state())
                env.Qtable[qstring][1] = reward
                DFA = updateDFA(DFA,webstate.name,action.action_string,next_state.name,env.Qtable[qstring][2])
                break
            #print("current_States {},total_states {},timediff {}".format(len(env.states),total_states,time.time()-tracetime1))
            if len(env.states)==total_states:
                if time.time()-tracetime1 >= tracetime:
                    print("trace time")
                    path = select_trace(DFA,env.Qtable,startstate.name)
                    tracetime1 = time.time()
                    break
            else:
                total_states = len(env.states)
                tracetime1 = time.time()
            
        except Exception as e:
            print(e)
            if "session id" in str(e):
                driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
            break

    episode+=1
    with open("qtable","wb") as file:
        pickle.dump(env.Qtable, file)
    with open("DFA","wb") as file:
        pickle.dump(DFA, file)
    with open("ENV","wb") as file:
        pickle.dump(env, file)

Episode no:  0
Episode no:  1
Episode no:  2
Episode no:  3
Episode no:  4
trace time
Episode no:  5
Episode no:  6
Episode no:  7
Episode no:  8
Episode no:  9
trace time
Episode no:  10
Alert Text: Date format is invalid. Please choose a date from the Date Picker if you are unsure of the date format you chose within the System Settings.
Message: unexpected alert open: {Alert text : Date format is invalid. Please choose a date from the Date Picker if you are unsure of the date format you chose within the System Settings.}
  (Session info: chrome=100.0.4896.75)
Stacktrace:
Backtrace:
	Ordinal0 [0x00BC7413+2389011]
	Ordinal0 [0x00B59F61+1941345]
	Ordinal0 [0x00A4C658+837208]
	Ordinal0 [0x00AA4172+1196402]
	Ordinal0 [0x00A93F66+1130342]
	Ordinal0 [0x00A6E546+976198]
	Ordinal0 [0x00A6F456+980054]
	GetHandleVerifier [0x00D79632+1727522]
	GetHandleVerifier [0x00E2BA4D+2457661]
	GetHandleVerifier [0x00C5EB81+569713]
	GetHandleVerifier [0x00C5DD76+566118]
	Ordinal0 [0x00B60B2B+1968939]
	Ordin