# AiVA API Tool

In [None]:
# ! py -m pip install selenium
# ! py -m pip install webdriver-manager

In [None]:
## Download Edge WebDriver
'''https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/'''
## Add path of the folder where you located msedgedriver to the PATH variable in your user environment variables 

## Note: You must run this code from a local folder

In [None]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import re
import winsound

## Define Parameters

In [None]:
file_path = r''
initial_prompt_file = file_path + 'Initial Prompt.txt'
subsequent_prompt_preamble = file_path + 'Subsequent Prompt.txt'
input_data_files = {'Input Data.xlsx'}
rows_per_data_chunk = 2
output_data_file = file_path + 'Output Data.xlsx'
email_address = 'first.last@inl.gov'

## Functions

In [None]:
def sign_in(driver):
    # Wait for the Sign In button to be clickable and then click it
    signin_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn.btn-primary'))
    )
    signin_button.click()

    # Wait for the email input field to be visible and then input the email address
    email_input = WebDriverWait(driver, 10).until(
        EC.visibility_of_element_located((By.ID, 'i0116'))
    )
    email_input.send_keys(email_address)

    # Wait for the Next button to be clickable and then click it
    next_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, 'idSIButton9'))
    )
    next_button.click()

    ## User inputs username and password in the sso popup
    winsound.MessageBeep(winsound.MB_ICONQUESTION)
    print('Please input your username and password in the Windows Security popup window')

    textarea = WebDriverWait(driver, 120).until(
            EC.presence_of_element_located((By.ID, "entry"))
        )
    print('Sign in successful')

In [None]:
def query_and_response(query, driver):
    # Wait for the textarea to be present and then enter text
    textarea = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "entry"))
    )
    textarea.send_keys(query)

    # Count the number of chat bubbles before submitting the query
    initial_chat_bubble_count = len(driver.find_elements(By.CSS_SELECTOR, '.bot-response.chat-bubble'))

    # Wait for the submit button to be clickable and then click it
    submit_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn.btn-primary.btn-sm'))
    )
    submit_button.click()
    
    # Wait for the number of chat bubbles to increase
    WebDriverWait(driver, 20).until(
        lambda driver: len(driver.find_elements(By.CSS_SELECTOR, '.bot-response.chat-bubble')) > initial_chat_bubble_count
    )

    # Find all response elements and select the last one
    response_elements = driver.find_elements(By.CSS_SELECTOR, '.bot-response.chat-bubble')
    most_recent_response = response_elements[-1]
    
    # Extract the response text
    response_text = most_recent_response.text
    return response_text

In [None]:
def format_chunk_to_string(chunk):
    # Format columns
    header = ' | '.join(chunk.columns)
    header = f'| {header} |'
    
    # Format rows
    rows = chunk.apply(lambda row: ' | '.join(row.values), axis=1)
    rows = rows.apply(lambda row: f'| {row} |')
    
    # Combine header and rows without newlines
    return f'{header} |--------| ' + ' '.join(rows)

In [None]:
def parse_string_to_df(data):
    # Find the start of the table
    table_start = re.search(r'\|\s*[^|]+\s*\|', data)
    if not table_start:
        raise ValueError("No table found in the input data.")
    
    # Extract the tabular part
    tabular_part = data[table_start.start():]
    
    # Split the string based on the pipe delimiter
    parts = [part.strip() for part in tabular_part.split('|') if part.strip()]
    
    # Extract column names
    columns = parts[:2]  # Assuming there are two columns
    
    # Skip the header separator
    data_tuples = []
    for i in range(2, len(parts), len(columns)):
        if re.match(r'^-+$', parts[i]):
            continue
        data_tuples.append(tuple(parts[i:i+len(columns)]))
    
    # Create a DataFrame
    df = pd.DataFrame(data_tuples, columns=columns)
    
    # Ensure the DataFrame has unique indices
    df.reset_index(drop=True, inplace=True)
    
    return df

In [None]:
def delete_session(driver):
    # Wait for the Trash button to be clickable and then click it
    trash_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn.btn-danger'))
    )
    trash_button.click()

## Load Data

In [None]:
## Load Inputs
with open(initial_prompt_file, 'r') as file:
    init_prompt = file.read()

with open(subsequent_prompt_preamble, 'r') as file:
    subs_prompt = file.read()

input_data = pd.DataFrame() 
for input_file in input_data_files:
    input_data = pd.concat([input_data, pd.read_excel(file_path+input_file)])

In [None]:
input_data

## Perform Queries and Record Responses

In [None]:
## Setup Browser Connection

# Set up Edge options to use the guest profile
edge_options = webdriver.EdgeOptions()
edge_options.add_argument('--guest')


# Set up the Edge driver
service = Service(EdgeChromiumDriverManager().install())
driver = webdriver.Edge(service=service, options=edge_options)

## Query and Record Responses
outputs = pd.DataFrame()
driver.get("https://aiva.inl.gov")
try:
    sign_in(driver)
    
    print(query_and_response(init_prompt, driver))
    for start in range(0, len(input_data), rows_per_data_chunk):
        end = start + rows_per_data_chunk
        chunk = input_data.iloc[start:end]
        chunk_string = format_chunk_to_string(chunk)
        print(subs_prompt+chunk_string)
        response = query_and_response(subs_prompt+chunk_string, driver)
        print(response)
        output = parse_string_to_df(response) ## TODO Write to file instead and parse separately?
        outputs = pd.concat([outputs, output], ignore_index=True)
    ## TODO Detect max input of session and start a new session
    delete_session(driver)

finally:
    driver.quit()

In [None]:
outputs