In [1]:
# Kerry Zhang
# 7/16/2023
# Objective: Scrape earnings call transcripts from roic.ai
# Notes: Earnings transcripts are divided by person with div class "p-3 rounded-lg false".

import os
import copy
import time
import pandas as pd
import numpy as np

from textblob import TextBlob
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

# Avoid throttling
chrome_options = Options()
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument("--headless")  # Enable headless mode

# Options
INPUT_FILE = "firms.xlsx"
OUTPUT_FOLDER = "transcripts"

YEARS = list(range(2006, 2023))
QUARTERS = [1, 2, 3, 4]

In [2]:
# Load tickers
TICKERS = pd.read_excel(INPUT_FILE, header = None)[0].values.tolist()
print("Number of tickers: {}".format(len(TICKERS)))

Number of tickers: 20


In [3]:
class text_to_disappear_and_return_new_text(object):
    def __init__(self, locator, initial_text):
        self.locator = locator
        self.initial_text = initial_text

    def __call__(self, driver):
        try:
            element = driver.find_element(*self.locator)
            element_text = element.text
            if self.initial_text not in element_text:
                return element_text
        except:
            # If element is not found or initial text still present, return False to continue waiting.
            return False

In [4]:
def scrape_transcript(ticker: list, year: list, quarter: list) -> tuple:
    url = 'https://roic.ai/transcripts/{}:US/{}/{}'.format(*[ticker, year, quarter])
    #url = "https://roic.ai/transcripts/{:}?y={:}&q={:}".format(*[ticker, year, quarter])
    
    # Set up Selenium WebDriver
    driver = webdriver.Chrome(chrome_options)  
    
    # Navigate to the website
    driver.get(url)
    
    # Wait until element is loaded
    wait = WebDriverWait(driver, 10)
    #locator = (By.ID, "__next")
    locator = (By.CLASS_NAME, "space-y-6")
    initial_text = "Please wait for a while ..."
    
    #element = wait.until_not(EC.invisibility_of_element_located(locator))
    #element = wait.until(EC.visibility_of_element_located((By.ID, "__next")))
    element = wait.until(text_to_disappear_and_return_new_text(locator, initial_text))
    
    # WIP: Throttling
    #wait.until(ExpectedConditions.not(ExpectedConditions.textToBePresentInElement(element, "Completed successfully")));
    
    # Extract Text
    text = copy.deepcopy(element)
    
    driver.quit()
    
    return text, url

In [5]:
def clean_text(earnings_transcript: str) -> list:

    # WIP: Remove non-ASCII characters
    earnings_transcript = earnings_transcript.replace("â\x80\x99", "'").replace("â\x80\x98", "'").replace("â\x80\x93", "'")
    
    lst = earnings_transcript.splitlines()
    
    # Remove speaker abbreviations
    lst = [element for element in lst if len(element) > 1]
    
    return lst

In [6]:
def split_text(lst: list, year: int, quarter: int):
    
    def is_name(segment: str) -> bool:
        words_in_segment = segment.split()
    
        if (
            segment == "Operator" or 
            (
             len(words_in_segment) == 2 and 
             words_in_segment[0].istitle() and 
             words_in_segment[1].istitle()
            )
           ):
            return True
        else:
            return False
        
    def get_sentiment(text):
        analysis = TextBlob(text)
        sentiment_score = analysis.sentiment.polarity
        
        return sentiment_score
    
    # Metadata
    filing_year_quarter = lst[0].replace(" · Earnings Call Transcript", "").split()
    date = lst[1]
    
#     filing_qtr = int(filing_year_quarter[0].replace("Q", ""))
#     filing_year = int(filing_year_quarter[1])
    
#     if year != filing_year or quarter != filing_qtr:
#         return None
        
    # Store [Position, Caller, Speech, Total Length, Average Segment Length] 
    position = 0
    caller = ""
    speech = ""
    paragraph_len_list = []
    
    # Process segments
    data = []
    for segment in lst[2:]:
        
        if is_name(segment):
            
            # Store current data
            data.append([position, 
                         caller, 
                         speech, 
                         np.sum(paragraph_len_list), 
                         np.mean(paragraph_len_list), 
                         get_sentiment(speech)])
            
            # Reset for next caller
            position += 1
            caller = segment
            speech = ""
            paragraph_len_list = []
            
        else:
            speech += segment + " "
            paragraph_len_list.append(len(segment.split()))
            
    df = pd.DataFrame(data[1:], columns=["position", "name", "speech", "tot_len", "avg_len", "sentiment"])
    
    return df

In [None]:
for ticker in TICKERS:
    for year in YEARS:
        for quarter in QUARTERS:
        
            print("{} {} Q{}".format(*[ticker, year, quarter]))
            
            filename = os.path.join(OUTPUT_FOLDER, 
                                    "{:}_{:}_Q{:}.csv".format(*[ticker, year, quarter])
                                   )
            
            if os.path.exists(filename):
                print(f"The file '{filename}' already exists.")
                continue
            
            # Get text from URL
            earnings_transcript, url = scrape_transcript(ticker, year, quarter)
            
            # Convert text into list of lists
            earnings_transcript = clean_text(earnings_transcript)
            
            if earnings_transcript == ["Nothing to show", "We apologize for the inconvenience, but there is no content to display at this time."]:
                print("No data...")
                continue
            
            # Convert to dataframe
            earnings_transcript = split_text(earnings_transcript, year, quarter)
            
            # Save
            earnings_transcript.to_csv(filename, encoding='utf-8-sig')
            
            # Temp. Force retry if contains string "Please wait for a while ..."
            time.sleep(5)

ABT 2006 Q1
No data...
ABT 2006 Q2
No data...
ABT 2006 Q3
No data...
ABT 2006 Q4
No data...
ABT 2007 Q1
No data...
ABT 2007 Q2
The file 'transcripts\ABT_2007_Q2.csv' already exists.
ABT 2007 Q3
The file 'transcripts\ABT_2007_Q3.csv' already exists.
ABT 2007 Q4
The file 'transcripts\ABT_2007_Q4.csv' already exists.
ABT 2008 Q1
The file 'transcripts\ABT_2008_Q1.csv' already exists.
ABT 2008 Q2
The file 'transcripts\ABT_2008_Q2.csv' already exists.
ABT 2008 Q3
The file 'transcripts\ABT_2008_Q3.csv' already exists.
ABT 2008 Q4
The file 'transcripts\ABT_2008_Q4.csv' already exists.
ABT 2009 Q1
The file 'transcripts\ABT_2009_Q1.csv' already exists.
ABT 2009 Q2
The file 'transcripts\ABT_2009_Q2.csv' already exists.
ABT 2009 Q3
The file 'transcripts\ABT_2009_Q3.csv' already exists.
ABT 2009 Q4
The file 'transcripts\ABT_2009_Q4.csv' already exists.
ABT 2010 Q1
The file 'transcripts\ABT_2010_Q1.csv' already exists.
ABT 2010 Q2
No data...
ABT 2010 Q3
The file 'transcripts\ABT_2010_Q3.csv' already

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2012 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2012 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2013 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2013 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2013 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2013 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2014 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2014 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2014 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2014 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2015 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2015 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2015 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2015 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2016 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2016 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2016 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2016 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2017 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2017 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2017 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2017 Q4
ABT 2018 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2018 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2018 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2018 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2019 Q1
ABT 2019 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2019 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2019 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2020 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2020 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2020 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2020 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2021 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2021 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2021 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2021 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2022 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2022 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2022 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ABT 2022 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2006 Q1
No data...
META 2006 Q2
No data...
META 2006 Q3
No data...
META 2006 Q4
No data...
META 2007 Q1
No data...
META 2007 Q2
No data...
META 2007 Q3
No data...
META 2007 Q4
No data...
META 2008 Q1
No data...
META 2008 Q2
No data...
META 2008 Q3
No data...
META 2008 Q4
No data...
META 2009 Q1
No data...
META 2009 Q2
No data...
META 2009 Q3
No data...
META 2009 Q4
No data...
META 2010 Q1
No data...
META 2010 Q2
No data...
META 2010 Q3
No data...
META 2010 Q4
No data...
META 2011 Q1
No data...
META 2011 Q2
No data...
META 2011 Q3
No data...
META 2011 Q4
No data...
META 2012 Q1
No data...
META 2012 Q2
META 2012 Q3
META 2012 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2013 Q1
META 2013 Q2
META 2013 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2013 Q4
META 2014 Q1
META 2014 Q2
META 2014 Q3
META 2014 Q4
META 2015 Q1
META 2015 Q2
META 2015 Q3
META 2015 Q4
META 2016 Q1
META 2016 Q2
META 2016 Q3
META 2016 Q4
META 2017 Q1
META 2017 Q2
META 2017 Q3
META 2017 Q4
META 2018 Q1
META 2018 Q2
META 2018 Q3
META 2018 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2019 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2019 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2019 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2019 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2020 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2020 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2020 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2020 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2021 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2021 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2021 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2021 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2022 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2022 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2022 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


META 2022 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2006 Q1
No data...
MSFT 2006 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2006 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2006 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2007 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2007 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2007 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2007 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2008 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2008 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2008 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2008 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2009 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2009 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2009 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2009 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2010 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2010 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2010 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2010 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2011 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2011 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2011 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2011 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2012 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2012 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2012 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2012 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2013 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2013 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2013 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2013 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2014 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2014 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2014 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2014 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2015 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2015 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2015 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2015 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2016 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2016 Q2
MSFT 2016 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2016 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2017 Q1
MSFT 2017 Q2
MSFT 2017 Q3
MSFT 2017 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2018 Q1
MSFT 2018 Q2
MSFT 2018 Q3
MSFT 2018 Q4
MSFT 2019 Q1
MSFT 2019 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2019 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2019 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2020 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2020 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2020 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2020 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2021 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2021 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2021 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2021 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2022 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2022 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2022 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MSFT 2022 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2006 Q1
No data...
CVX 2006 Q2
No data...
CVX 2006 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2006 Q4
No data...
CVX 2007 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2007 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2007 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2007 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2008 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2008 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2008 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2008 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2009 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2009 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2009 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2009 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2010 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2010 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2010 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2010 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2011 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2011 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2011 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2011 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2012 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2012 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2012 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2012 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2013 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2013 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2013 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2013 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2014 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2014 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2014 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2014 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2015 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2015 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2015 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2015 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2016 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2016 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2016 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2016 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2017 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2017 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2017 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2017 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2018 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2018 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2018 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2018 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2019 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2019 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2019 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2019 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2020 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2020 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2020 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2020 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2021 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2021 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2021 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2021 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2022 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2022 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2022 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


CVX 2022 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2006 Q1
No data...
MRK 2006 Q2
No data...
MRK 2006 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2006 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2007 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2007 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2007 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2007 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2008 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2008 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2008 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2008 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2009 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2009 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2009 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2009 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2010 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2010 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2010 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2010 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2011 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2011 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2011 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2011 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2012 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2012 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2012 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2012 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2013 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2013 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2013 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2013 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2014 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2014 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2014 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2014 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2015 Q1
MRK 2015 Q2
MRK 2015 Q3
MRK 2015 Q4
MRK 2016 Q1
MRK 2016 Q2
MRK 2016 Q3
MRK 2016 Q4
MRK 2017 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2017 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2017 Q3
MRK 2017 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2018 Q1
MRK 2018 Q2
MRK 2018 Q3
MRK 2018 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2019 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2019 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2019 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2019 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2020 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2020 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2020 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2020 Q4


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2021 Q1


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2021 Q2


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


MRK 2021 Q3


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


# Take advantage of div separation via xpath (WIP)

In [None]:
if 0:
    # Set up Selenium WebDriver
    driver = webdriver.Chrome()  

    # Navigate to the website
    driver.get("https://roic.ai/transcripts/ABT?y=2022&q=4")

    # Find all <div> elements with the class "p-3 rounded-lq false"
    WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "p-3"))).click()
    div_elements = driver.find_elements("xpath", '//*[@id="__next"]/div/main/div[3]/div/div[2]/div/div[2]')

    # Scrape the text content of each matching <div> element
    for div_element in div_elements:
        text = div_element.text
        print(text)

    # Close the browser
    driver.quit()