In [58]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from rapidfuzz import fuzz
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
import time

username = {"tag":"input","type":"text","id":"username","name":"username","placeholder":"Enter your username"}
password = {"tag":"input","type":"password","id":"password","name":"password","placeholder":"Enter your password"}
url = "https://rakeshghosal9.github.io/self-healing-test-playground/"

In [25]:
# Function to preprocess attributes into text
def preprocess(element):
    return f"tag={element.get('tag', '').lower()} " \
           f"type={element.get('type', '').lower()} " \
           f"id={element.get('id', '').lower()} " \
           f"name={element.get('name', '').lower()} " \
           f"placeholder={element.get('placeholder', '').lower()}" 

In [54]:
def convert_html_to_dict(html_input):
    try:
        #print("HTML Code")
        #print(html_input)
        # Parse the HTML using BeautifulSoup 
        soup = BeautifulSoup(html_input, 'html.parser') 
        input_tag = soup.find('input')
        # Check if the input tag is found
        if input_tag is None:
            raise ValueError("No 'input' tag found in the provided HTML.")
        #print("Dictionary")
        # Convert to dictionary 
        input_dict = { 
          'tag':'input',
          'type': input_tag.get('type'), 
          'id': input_tag.get('id'), 
          'name': input_tag.get('name'), 
          'placeholder': input_tag.get('placeholder'),
          'required': input_tag.get('required', False) 
        }
        #print(input_dict)
        return input_dict

    except Exception as e:
        print(e)

    

In [27]:
def calculate_similarity(element1_text,element2_text,web_element1,web_element2):
    # Compute TF-IDF Cosine Similarity
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([element1_text, element2_text])
    cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
    # Compute Levenshtein Similarity for specific attributes
    tag_sim = fuzz.ratio(web_element1.get('tag', ''), web_element2.get('tag', '')) / 100.0
    type_sim = fuzz.ratio(web_element1.get('type', ''), web_element2.get('type', '')) / 100.0
    id_sim = fuzz.ratio(web_element1.get('id', ''), web_element2.get('id', '')) / 100.0
    name_sim = fuzz.ratio(web_element1.get('name', ''), web_element2.get('name', '')) / 100.0
    placeholder_sim = fuzz.ratio(web_element1.get('placeholder', ''), web_element2.get('placeholder', '')) / 100.0

    # Combine similarities with weights
    final_similarity = float(0.5 * cosine_sim + 0.1 * tag_sim + 0.1 * type_sim + 0.1 * id_sim + 0.1 * name_sim + 0.1 * placeholder_sim)

    # Results
    print(f"Cosine Similarity (TF-IDF): {cosine_sim:.2f}")
    print(f"Tag Similarity (Levenshtein): {tag_sim:.2f}")
    print(f"Type Similarity (Levenshtein): {type_sim:.2f}")
    print(f"ID Similarity (Levenshtein): {id_sim:.2f}")
    print(f"Name Similarity (Levenshtein): {name_sim:.2f}")
    print(f"Placeholder Similarity (Levenshtein): {placeholder_sim:.2f}")
    print(f"Final Combined Similarity: {final_similarity:.2f}")
    return final_similarity

In [67]:
def get_alternate_locator(current_html_source, expected_tag, old_attribute):
    attribute_preprocessed = preprocess(old_attribute)
    max_similarity=0
    html_attr_with_max_similarity = ""
    for line_no, html_value in enumerate(current_html_source.split('\n')):
        line_value = html_value.strip()
        if(line_value.startswith("<input")):
            print(line_value)
            line_value_dict = convert_html_to_dict(line_value)
            line_value_preprocess = preprocess(line_value_dict)
            combined_similarity=float(calculate_similarity(attribute_preprocessed,line_value_preprocess,old_attribute,line_value_dict))
            if(combined_similarity > max_similarity):
                max_similarity = combined_similarity
                html_attr_with_max_similarity = line_value
    print("Max similarity : "+str(max_similarity))
    print("Most Matched Line : "+html_attr_with_max_similarity)
    return str(html_attr_with_max_similarity)
            
        

In [65]:
def enter_value_textbox(id,value,driver,attribute):
    try:
        element = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.ID,id)))
        driver.find_element("id",id).send_keys(value)
        print("Value entered successfully")
    except NoSuchElementException:
        print("No Such Element")
    except TimeoutException:
        print("Timeout Exception")
        source_content = driver.page_source
        html_code=get_alternate_locator(source_content,"input",attribute)
        line_value_dict = convert_html_to_dict(html_code)
        print("updated id : "+line_value_dict.get('id'))
        id=line_value_dict.get('id')
        element = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.ID,id)))
        driver.find_element("id",id).send_keys(value)
        print("Value entered successfully")
        # Pause the execution for 5 seconds
        time.sleep(5)


In [68]:
driver = webdriver.Chrome()
driver.get(url)
# Maximize the browser window
driver.maximize_window()

# Wait until the page is fully loaded
wait = WebDriverWait(driver, 10)  # Wait for up to 10 seconds
try:
    wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))  # Wait for the body tag
    print("Page loaded successfully.")
    enter_value_textbox("username","Rakesh",driver,username)
    enter_value_textbox("password","Password",driver,password)
    #source_content = driver.page_source
    #get_alternate_locator(source_content,"input",username)
    #print(source_content)
except Exception as e:
    print(f"Error loading page: {e}")
    driver.quit()

driver.quit()

Page loaded successfully.
Timeout Exception
<input type="text" id="username1" name="username" placeholder="Enter your username" required="">
Cosine Similarity (TF-IDF): 0.91
Tag Similarity (Levenshtein): 1.00
Type Similarity (Levenshtein): 1.00
ID Similarity (Levenshtein): 0.94
Name Similarity (Levenshtein): 1.00
Placeholder Similarity (Levenshtein): 1.00
Final Combined Similarity: 0.95
<input type="password" id="password1" name="password" placeholder="Enter your password" required="">
Cosine Similarity (TF-IDF): 0.29
Tag Similarity (Levenshtein): 1.00
Type Similarity (Levenshtein): 0.00
ID Similarity (Levenshtein): 0.24
Name Similarity (Levenshtein): 0.25
Placeholder Similarity (Levenshtein): 0.68
Final Combined Similarity: 0.36
Max similarity : 0.9509288842646628
Most Matched Line : <input type="text" id="username1" name="username" placeholder="Enter your username" required="">
updated id : username1
Value entered successfully
Timeout Exception
<input type="text" id="username1" name=