# PART B
### Group 1:
Zachary Wilkerson, Tara Bode, Hankun Li

### Use Selenium + jQuery for the following task:

- Navigate to: https://www.barchart.com/options/highest-implied-volatility (Links to an external site.)
- Retrieve (and count) the ticker symbols on that page (first 100) (CVM, AMC, OCGN, etc), and count how often these occur
- Keep the symbols that occur at least 3 times
- For each symbol, visit the following page: https://finance.yahoo.com/quote/SYMBOL/options?p=SYMBOL (Links to an external site.), for example https://finance.yahoo.com/quote/CVM/options?p=CVM (Links to an external site.)
- For the options on that page, calculate the average % bid-ask spread relative to the mid-point. For example, if the bid is $1.00 and the ask is $1.20, then the bid-ask spread is $0.20 / $1.10 = 18.18%. Do this calculation for all calls and puts and calculate the average (no need to navigate to other expiry dates)
- Navigate to https://finance.yahoo.com/quote/SYMBOL/community?p=CVM (Links to an external site.) (for example https://finance.yahoo.com/quote/CVM/community?p=CVM (Links to an external site.)) and retrieve the last 1,000 posts (write these to disk, one file for each symbol, you can ignore the replies to messages)

In [27]:
import time
from selenium import webdriver
import pandas as pd
import regex as re

options = webdriver.ChromeOptions();
options.add_argument("disable-geolocation");
options.add_argument("disable-notifications");
#options.add_argument("--disable-automation")
#options.add_experimental_option("excludeSwitches" , ["enable-automation"])
#options.add_experimental_option("useAutomationExtension", False)

driver = webdriver.Chrome(r'/Users/zwilkerson/Desktop/UF/ACG7849 - Web Crawling and Textual Analysis/selenium_files/chromedriver',options=options)

In [28]:
# FUNCTION: Injects jQuery
def jq_injector(driver):
    with open(r'/Users/zwilkerson/Desktop/UF/ACG7849 - Web Crawling and Textual Analysis/selenium_files/jquery.js', 'r') as jquery_js:
        driver.execute_script( jquery_js.read() ) 

# FUNCTION: Loads a website using Selenium and injects jQuery
def website_loader(driver,site):
    driver.get(site)
    jq_injector(driver)

# FUNCTION: Populates a dictionary
def dict_adder(key,dic):
    try:
        dic[key] += 1;
    except:
        dic[key] = 1;
    return dic

# FUNCTION: Finds the average of a list
def find_average(input_list):
    return sum(input_list) / len(input_list)

In [29]:
# Navigate to: https://www.barchart.com/options/highest-implied-volatility
website_loader(driver,"https://www.barchart.com/options/highest-implied-volatility")

In [30]:
# Retrieve (and count) the ticker symbols on that page (first 100) (CVM, AMC, OCGN, etc), 
# and count how often these occur

symbol_data = '''
els = $("div.bc-table-scrollable-inner tr:not(:first):not(:last)")

first_100 = []
els.each( function(i,el) { 
    if (i<=99) {
        first_100.push( $(el).attr("data-current-symbol") )
    }
}   )
return first_100;
'''

symbol_list = driver.execute_script(symbol_data) # Run javascript to pull symbols
symbol_dict = {} # Initialize
for i in range(len(symbol_list)): dict_adder(symbol_list[i],symbol_dict) # Count how often symbol occurs

In [35]:
# symbol_dict

In [33]:
# Keep the symbols that occur at least 3 times
for k in symbol_dict.copy():
    if symbol_dict[k] < 3: 
        del symbol_dict[k]

In [36]:
# For each symbol, visit the following page: https://finance.yahoo.com/quote/SYMBOL/options?p=SYMBOL
# For the options on that page, calculate the average % bid-ask spread relative to the mid-point. 
# For example, if the bid is $1.00 and the ask is $1.20, then the bid-ask spread is $0.20 / $1.10 = 18.18%. 
# Do this calculation for all calls and puts and calculate the average (no need to navigate to other expiry dates)

options_data = '''
bids = $("td.data-col4")
asks = $("td.data-col5")
bid_list = []
ask_list = []
bids.each( function(i,el) {bid_list.push(parseFloat(el.innerHTML))})
asks.each( function(i,el) {ask_list.push(parseFloat(el.innerHTML))})
obj = {
    bids: bid_list,
    asks: ask_list,
};

return obj;
'''

options_dict = {}; # Initialize

for k in symbol_dict: # For each symbol
    website_loader(driver,"https://finance.yahoo.com/quote/"+str(k)+"/options?p="+str(k)) # Load website
    bid_ask_list = driver.execute_script(options_data) # Run javascript to pull bids and asks
    if bid_ask_list['asks'] == [] or bid_ask_list['bids'] == []:
        driver.refresh()
        jq_injector(driver)
    
    # Find spread (with some error checking)
    bid_ask_spread_list = [] # Initialize/reset for every symbol
    bid_ask_spread_list = [abs(x1 - x2)/((x1+x2)/2)*100 for (x1, x2) in zip(bid_ask_list['bids'], bid_ask_list['asks']) if x1 is not None and x2 is not None and (x1+x2!=0)]
    options_dict[k] = "{:.2f}%".format(round(find_average(bid_ask_spread_list), 2)) # Save the average % bid-ask spread relative to the mid-point for each symbol

driver.close()

In [None]:
# Perform some error checking
#bid_ask_list['bids'] = [0 if v is None else v for v in bid_ask_list['bids']]
#bid_ask_list['asks'] = [0 if v is None else v for v in bid_ask_list['asks']]
#bid_ask_spread_list = [abs(x1 - x2)/((x1+x2)/2)*100 for (x1, x2) in zip(bid_ask_list['bids'], bid_ask_list['asks'])]
#bid_ask_spread_list = [0 if x1 is None or x2 is None or (x1+x2==0) else abs(x1 - x2)/((x1+x2)/2)*100 for (x1, x2) in zip(bid_ask_list['bids'], bid_ask_list['asks'])]
#bid_ask_spread_list = [abs(x1 - x2)/((x1+x2)/2)*100 if x1 is not None and x2 is not None and (x1+x2!=0) else pass for (x1, x2) in zip(bid_ask_list['bids'], bid_ask_list['asks'])]


In [None]:
#options_dict

In [41]:
# For each symbol, visit the following page: https://finance.yahoo.com/quote/SYMBOL/community?p=SYMBOL
# Retrieve the last 1,000 posts (write these to disk, one file for each symbol)

post_getter = '''
setTimeout( function() { $('button.showNext').click() }, 1000);
els = $("div[class='C($c-fuji-grey-l) Mb(2px) Fz(14px) Lh(20px) Pend(8px)']")
return els;
'''

for k in symbol_dict: # For each symbol
    driver = webdriver.Chrome(r'/Users/zwilkerson/Desktop/UF/ACG7849 - Web Crawling and Textual Analysis/selenium_files/chromedriver',options=options)
    website_loader(driver,"https://finance.yahoo.com/quote/"+str(k)+"/community?p="+str(k)) # Load website
    
    # Display, and then pull, 1000 posts
    els = driver.execute_script(post_getter)
    if len(els)==0:
        driver.refresh()
        jq_injector(driver)
    start_time = time.time()
    while (len(els) < 1000):
        try: 
            els = driver.execute_script(post_getter)
        except TimeoutException:
            print('[Timeout Error] Loaded {} posts. Unable to load additional posts...'.format(len(els)))
            break
        if (time.time() - start_time)>120: # If more than 2 minutes elapse, break the loop
            print('[Timeout Error] Loaded {} posts. Unable to load additional posts...'.format(len(els)))
            break

    # Save posts to disk, one file per symbol
    start_time = time.time()
    with open('{}.txt'.format(k), 'wt') as file:
        for el in els:
            file.write('##############################\n'+el.text+'\n##############################\n')
        if len(els)==0:
            file.write('##############################\n'+'No posts loaded...'+'\n##############################\n')
            print('No posts loaded.')
    print('Writing {} file took {:.2f} seconds'.format(k,time.time() - start_time))
    driver.close()

Writing AMC file took 27.66 seconds
[Timeout Error] Loaded 874 posts. Unable to load additional posts...
Writing BBIG file took 9.29 seconds
[Timeout Error] Loaded 994 posts. Unable to load additional posts...
Writing TELL file took 10.26 seconds
[Timeout Error] Loaded 960 posts. Unable to load additional posts...
Writing RIOT file took 11.14 seconds
[Timeout Error] Loaded 313 posts. Unable to load additional posts...
Writing EDU file took 2.93 seconds
[Timeout Error] Loaded 957 posts. Unable to load additional posts...
Writing IPOE file took 111.79 seconds
Writing VLDR file took 10.15 seconds
[Timeout Error] Loaded 581 posts. Unable to load additional posts...
Writing TAL file took 5.15 seconds
Writing TIGR file took 10.97 seconds
[Timeout Error] Loaded 680 posts. Unable to load additional posts...
Writing CLOV file took 9.63 seconds
[Timeout Error] Loaded 980 posts. Unable to load additional posts...
Writing CLF file took 12.29 seconds
[Timeout Error] Loaded 919 posts. Unable to load