In [1]:
import requests
import time
import re
import os
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [19]:
# Direction to saving folder
save_folder = './Earnings Call Transcripts/'

In [3]:
# Adjust the list of interested companies accordingly
companies = {
    'DaVita Inc.': 'DVA',
    'Equifax Inc.': 'EFX',
    'MEDNAX, Inc.': 'MD',
    'Oshkosh Corporation': 'OSK',
    'Palo Alto Networks, Inc.': 'PANW',
    'EchoStar Corporation': 'SATS',
    'IDT Corporation': 'IDT',
    "Kohl's Corporation": 'KSS',
    'Roper Technologies, Inc.': 'ROP',
    'Zoom Video Communications, Inc.': 'ZM',
    'Take-Two Interactive Software, Inc.': 'TTWO',
    'Synopsys, Inc.': 'SNPS',
    'Dover Corporation': 'DOV',
    'Allison Transmission Holdings, Inc.': 'ALSN',
    'Arrow Electronics, Inc.': 'ARW',
    'ServiceNow, Inc.': 'NOW',
    'Urban Outfitters, Inc.': 'URBN',
    'agilon health, inc.': 'AGL',
    'R1 RCM Inc.': 'RCM',
    'Akamai Technologies, Inc.': 'AKAM',
    'Amdocs Limited': 'DOX',
    'Dolby Laboratories, Inc.': 'DLB',
    'Autodesk, Inc.': 'ADSK',
    'Benchmark Electronics, Inc.': 'BHE',
    'Ciena Corporation': 'CIEN',
    'CommScope Holding Company, Inc.': 'COMM',
    'F5, Inc.': 'FFIV',
    'IPG Photonics Corporation': 'IPGP',
    'Itron, Inc.': 'ITRI',
    'Juniper Networks, Inc.': 'JNPR',
    'Keysight Technologies, Inc.': 'KEYS',
    'Sanmina Corporation': 'SANM',
    'Ubiquiti Inc.': 'UI',
    'AMN Healthcare Services, Inc.': 'AMN',
    'STERIS Corporation': 'STE',
    'Henry Schein, Inc.': 'HSIC',
    'ResMed Inc.': 'RMD',
    'IDEXX Laboratories, Inc.': 'IDXX',
    'DexCom, Inc.': 'DXCM',
    'Patterson Companies, Inc.': 'PDCO',
    'Becton, Dickinson and Company': 'BDX',
    'Dana Incorporated': 'DAN',
    'Lear Corporation': 'LEA',
    'Visteon Corporation': 'VC',
    'Terex Corporation': 'TEX',
    'BioMarin Pharmaceutical Inc.': 'BMRN',
    'Mondelez International, Inc.': 'MDLZ',
    'Kennametal Inc.': 'KMT',
    'McCormick & Company, Incorporated': 'MKC',
    'Campbell Soup Company': 'CPB',
    'Automatic Data Processing, Inc.': 'ADP',
    "DICK'S Sporting Goods, Inc.": 'DKS',
    'JOANN Inc.': 'JOANQ', # delisted
    'Ulta Beauty, Inc.': 'ULTA',
    'Chemed Corporation': 'CHE',
    'FTI Consulting, Inc.': 'FCN',
    'BorgWarner Inc.': 'BWA',
    'Winnebago Industries, Inc.': 'WGO',
    'GoDaddy Inc.': 'GDDY',
    'WNS (Holdings) Limited': 'WNS'
}

In [4]:
# helper function to pretty print long, single-line text to multi-line text
def pprint(text, line_length=100):
  words = text.split(' ')
  lines = []
  current_line = ''
  for word in words:
    if len(current_line + ' ' + word) <= line_length:
      current_line += ' ' + word
    else:
      lines.append(current_line.strip())
      current_line = word
  if current_line:
    lines.append(current_line.strip())
  print('\n'.join(lines))

### The Motley Fool's

In [13]:
# Function to find the company profile and click on the News link to find the Earnings Call Transcript
def find_transcript_motley(ticker):
    # Set up WebDriver with Chrome options
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("executable_path=./chromewd.exe")
    wd = webdriver.Chrome(options=chrome_options)
    
    try:
        # Step 1: Navigate to The Motley Fool
        wd.get(f'https://www.fool.com')
        
        # Step 2: Wait for the search input box to load and enter the ticker
        search_box = WebDriverWait(wd, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'input.searchbox.ticker-input-input'))
        )
        search_box.clear()  # Clear any pre-existing text in the search box
        # Slowly input the ticker symbol, one character at a time
        for char in ticker:
            search_box.send_keys(char)
            time.sleep(0.3)  # Adjust the speed of typing
        
        # Step 3: Wait for the dropdown list to appear and locate the exact ticker
        WebDriverWait(wd, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "ticker-input-results-symbol"))
        )
        # Find all dropdown options
        dropdown_options = wd.find_elements(By.CLASS_NAME, "ticker-input-results-result")
        us_ticker_element = None
        
        for option in dropdown_options:
            symbol = option.find_element(By.CLASS_NAME, "ticker-input-results-symbol").text
            exchange = option.find_element(By.CLASS_NAME, "ticker-input-results-exchange").text
            
            # Check if this is the correct ticker and it is listed on NYSE or NASDAQ
            if symbol.upper() == ticker.upper() and exchange in ["NYSE", "NASDAQ"]:
                us_ticker_element = option
                break

        # Click on the U.S. ticker element if found
        if us_ticker_element:
            us_ticker_element.click()

        # Step 4: Find and click the "Load More" button 3 times
        view_more_xpath = f"//button[span[contains(text(), 'View More {ticker.upper()} Earnings Transcripts')]]"
        
        # Click the button 3 times
        for i in range(3):
            try:
                # Wait for the button to be clickable
                earnings_transcripts_button = WebDriverWait(wd, 10).until(
                    EC.presence_of_element_located((By.XPATH, view_more_xpath))
                )
                
                # Scroll the button into view
                wd.execute_script("arguments[0].scrollIntoView(true);", earnings_transcripts_button)
                
                # Use JavaScript to click the button (bypassing normal click)
                wd.execute_script("arguments[0].click();", earnings_transcripts_button)
                
                # Wait for additional content to load before the next click
                time.sleep(3)
                
            except Exception as e:
                print(f"Error during click {i+1}: {e}")
                break  # If there's an error, stop further clicks
                
        # Step 5: Find all elements with 'data-track-category="quotepage_transcripts"'
        transcript_elements = wd.find_elements(By.XPATH, "//a[@data-track-category='quotepage_transcripts']")

        # Collect the href attribute (the actual URL) from each element
        links = []
        for elem in transcript_elements:
            href = elem.get_attribute('href')
            links.append(href)

        return links

    except Exception as e:
        print(f"Error during search: {e}")
        return []

    finally:
        # Close the browser after the task is complete
        wd.quit()

In [24]:
tickers = list(companies.values())

# Dictionary to store links for each company
all_company_links_motley = {}

# Loop through each ticker and collect the earnings call links
for ticker in tickers:
    print(f"Processing ticker: {ticker}")
    
    # Call the function to get earnings call links for this ticker
    transcript_links_motley = find_transcript_motley(ticker)
    
    # Store the links in the dictionary with the ticker as the key
    all_company_links_motley[ticker] = transcript_links_motley

all_company_links_motley

Processing ticker: TEX
Processing ticker: RCM
Error during click 1: Message: 
Stacktrace:
0   chromedriver                        0x0000000100a9f648 cxxbridge1$str$ptr + 3645404
1   chromedriver                        0x0000000100a97ea8 cxxbridge1$str$ptr + 3614780
2   chromedriver                        0x0000000100504104 cxxbridge1$string$len + 88416
3   chromedriver                        0x0000000100546364 cxxbridge1$string$len + 359360
4   chromedriver                        0x000000010057fbd0 cxxbridge1$string$len + 594988
5   chromedriver                        0x000000010053af54 cxxbridge1$string$len + 313264
6   chromedriver                        0x000000010053bba4 cxxbridge1$string$len + 316416
7   chromedriver                        0x0000000100a6a1e8 cxxbridge1$str$ptr + 3427196
8   chromedriver                        0x0000000100a6d52c cxxbridge1$str$ptr + 3440320
9   chromedriver                        0x0000000100a5160c cxxbridge1$str$ptr + 3325856
10  chromedriver     

{'TEX': ['https://www.fool.com/earnings/call-transcripts/2022/08/03/terex-tex-q2-2022-earnings-call-transcript/',
  'https://www.fool.com/earnings/call-transcripts/2022/04/29/terex-tex-q1-2022-earnings-call-transcript/',
  'https://www.fool.com/earnings/call-transcripts/2022/02/11/terex-tex-q4-2021-earnings-call-transcript/',
  'https://www.fool.com/earnings/call-transcripts/2021/10/29/terex-corp-tex-q3-2021-earnings-call-transcript/',
  'https://www.fool.com/earnings/call-transcripts/2021/07/30/terex-corp-tex-q2-2021-earnings-call-transcript/',
  'https://www.fool.com/earnings/call-transcripts/2021/04/30/terex-corp-tex-q1-2021-earnings-call-transcript/',
  'https://www.fool.com/earnings/call-transcripts/2021/02/12/terex-corp-tex-q4-2020-earnings-call-transcript/',
  'https://www.fool.com/earnings/call-transcripts/2020/05/02/terex-tex-q1-2020-earnings-call-transcript.aspx',
  'https://www.fool.com/earnings/call-transcripts/2020/02/14/terex-tex-q4-2019-earnings-call-transcript.aspx',
  

In [25]:
# Flatten the dictionary into a single list of links
all_links_motley = []
for links in all_company_links_motley.values():
    all_links_motley.extend(links)

all_links_motley

['https://www.fool.com/earnings/call-transcripts/2022/08/03/terex-tex-q2-2022-earnings-call-transcript/',
 'https://www.fool.com/earnings/call-transcripts/2022/04/29/terex-tex-q1-2022-earnings-call-transcript/',
 'https://www.fool.com/earnings/call-transcripts/2022/02/11/terex-tex-q4-2021-earnings-call-transcript/',
 'https://www.fool.com/earnings/call-transcripts/2021/10/29/terex-corp-tex-q3-2021-earnings-call-transcript/',
 'https://www.fool.com/earnings/call-transcripts/2021/07/30/terex-corp-tex-q2-2021-earnings-call-transcript/',
 'https://www.fool.com/earnings/call-transcripts/2021/04/30/terex-corp-tex-q1-2021-earnings-call-transcript/',
 'https://www.fool.com/earnings/call-transcripts/2021/02/12/terex-corp-tex-q4-2020-earnings-call-transcript/',
 'https://www.fool.com/earnings/call-transcripts/2020/05/02/terex-tex-q1-2020-earnings-call-transcript.aspx',
 'https://www.fool.com/earnings/call-transcripts/2020/02/14/terex-tex-q4-2019-earnings-call-transcript.aspx',
 'https://www.fool

In [22]:
# Function to scrape transcript text from a given URL 
def scrape_motley(url, folder):
    try:      
        # Send a GET request to the website
        response = requests.get(url)
        response.raise_for_status()  # Check if the request was successful

        # Create a BeautifulSoup object to parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')

        # Generate a filename based on the company name and quarter from the URL
        match = re.search(r'earnings/call-transcripts/\d{4}/\d{2}/\d{2}/([a-z0-9-]+)-([a-z]+)-([qQ]\d-\d{4})', url)
        if match:
            company_name = match.group(1)
            ticker = match.group(2).upper()
            quarter = match.group(3).lower()
            ticker_folder = os.path.join(folder, ticker)
            filename = f"{ticker}-{quarter}.txt"
        else:
            ticker_folder = folder
            filename = "transcript.txt"

        # Ensure the ticker folder exists
        os.makedirs(ticker_folder, exist_ok=True)

        # Extract the relevant text in paragraph tags
        paragraphs = soup.find_all('p')

        # Initialize a flag to start extracting after "earnings calls"
        start_extracting = False
        extracted_text = []

        # Loop through the paragraphs
        for i, para in enumerate(paragraphs):
            text = para.get_text()
            
            # Start extracting when "earnings calls" is found
            if "earnings call" in text and not start_extracting:
                start_extracting = True

            # Stop scraping when reaching the bottom of the transcript
            if para.find('strong') and "Duration:" in para.find('strong').get_text():
                break
            
            # Append text to extracted_text if start_extracting is True
            if start_extracting:
                extracted_text.append(text)

        # Combine the extracted paragraphs into a single string
        final_text = "\n".join(extracted_text)

        # Save the final text to a .txt file
        file_path = os.path.join(ticker_folder, filename)
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(final_text)
            
        return final_text
    
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return None

In [26]:
# Scrape and save Earnings Call Transcript
for url in all_links_motley:
    transcript_text = scrape_motley(url, save_folder)

# Print the extracted transcript
#if transcript_text:
#    pprint(transcript_text)

### Insider Monkey

In [27]:
def find_earnings_call_for_page(wd, url):
    wd.get(url)
    
    # Wait for the page to load
    WebDriverWait(wd, 10).until(EC.presence_of_element_located((By.TAG_NAME, "a")))
    links = wd.find_elements(By.TAG_NAME, "a")
    
    # Extract the links related to Earnings Call Transcript
    earnings_call_links = set()
    for link in links:
        href = link.get_attribute('href')
        title = link.get_attribute('title')
        if (title and "Earnings Call Transcript" in title) or (href and "earnings-call-transcript" in href):
            earnings_call_links.add(href)

    return list(earnings_call_links)

In [28]:
# Function to find the company profile and click on the News link to find the Earnings Call Transcript
def find_earnings_call(ticker):
    # Set up WebDriver with Chrome options
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("executable_path=./chromewd.exe")
    wd = webdriver.Chrome(options=chrome_options)
    
    try:
        # Step 1: Navigate to the search results page for the ticker
        wd.get(f'https://www.insidermonkey.com/search?q={ticker}')
        
        # Step 2: Wait for the page to load and find the link that contains the ticker
        WebDriverWait(wd, 10).until(EC.presence_of_element_located((By.TAG_NAME, "a")))
        
        # Locate and click the link that contains the ticker (case-insensitive)
        link = wd.find_element(By.PARTIAL_LINK_TEXT, ticker.upper())
        link.click()
        
        # Step 3: Wait for the company's profile page to load
        WebDriverWait(wd, 10).until(EC.title_contains(ticker.upper()))
        
        # Step 4: Get the current URL of the company profile
        current_url = wd.current_url
        print(f"Successfully navigated to the company profile for {ticker}: {current_url}")
        
        # Step 5: Scroll into view for the "News" link and wait for it to be clickable
        WebDriverWait(wd, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "News")))
        news_link = wd.find_element(By.LINK_TEXT, "News")
        news_url = news_link.get_attribute('href')
        
        # Step 6: Paginate through the news pages and collect earnings call links
        all_earnings_call_links = set()
        page = 0
        while True:
            # Break the loop when reaching the first 100 news, can change to bigger numbers if the company is bigger (more news)
            if page >= 100:
                print(f"Stop after 100 latest news.")
                break
            
            # Construct the URL for each page
            if page == 0:
                url = news_url
            else:
                url = f"{news_url}{page}/"
            
            # Get earnings call links from this page
            earnings_call_links = find_earnings_call_for_page(wd, url)
            earnings_call_links = [link + "/?singlepage=1" for link in earnings_call_links]
            
            # Add found links to the master set
            all_earnings_call_links.update(earnings_call_links)
            
            # Move to the next page (increments of 10)
            page += 10

        # Return all found earnings call links as a list
        return list(all_earnings_call_links)
    
    except Exception as e:
        print(f"Error: {e}")
    
    finally:
        # Ensure the browser is closed after completion
        wd.quit()

In [29]:
tickers = list(companies.values())
#tickers =['UPWK']

# Dictionary to store links for each company
all_company_links = {}

# Loop through each ticker and collect the earnings call links
for ticker in tickers:
    print(f"Processing ticker: {ticker}")
    
    # Call the function to get earnings call links for this ticker
    transcript_links = find_earnings_call(ticker)
    
    # Store the links in the dictionary with the ticker as the key
    all_company_links[ticker] = transcript_links

all_company_links

Processing ticker: DVA
Successfully navigated to the company profile for DVA: https://www.insidermonkey.com/insider-trading/company/davita%20inc/927066/
Stop after 100 latest news.
Processing ticker: EFX
Error: Message: no such element: Unable to locate element: {"method":"partial link text","selector":"EFX"}
  (Session info: chrome=130.0.6723.92); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001033cb648 cxxbridge1$str$ptr + 3645404
1   chromedriver                        0x00000001033c3ea8 cxxbridge1$str$ptr + 3614780
2   chromedriver                        0x0000000102e30104 cxxbridge1$string$len + 88416
3   chromedriver                        0x0000000102e72364 cxxbridge1$string$len + 359360
4   chromedriver                        0x0000000102eabbd0 cxxbridge1$string$len + 594988
5   chromedriver                        0x00000

{'DVA': ['https://www.insidermonkey.com/blog/davita-inc-nysedva-q2-2024-earnings-call-transcript-1331185/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q2-2023-earnings-call-transcript-1177070/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q3-2024-earnings-call-transcript-1383232/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q3-2023-earnings-call-transcript-1219468/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q4-2022-earnings-call-transcript-1124544/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q4-2023-earnings-call-transcript-1259127/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q1-2024-earnings-call-transcript-1296488/?singlepage=1'],
 'EFX': None,
 'MD': ['https://www.insidermonkey.com/blog/pediatrix-medical-group-inc-nysemd-q2-2023-earnings-call-transcript-1178632/?singlepage=1',
  'https://www.insidermonkey.com/blog/pediat

In [32]:
# Remove entries with None values
all_company_links = {key: value for key, value in all_company_links.items() if value is not None}
all_company_links

{'DVA': ['https://www.insidermonkey.com/blog/davita-inc-nysedva-q2-2024-earnings-call-transcript-1331185/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q2-2023-earnings-call-transcript-1177070/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q3-2024-earnings-call-transcript-1383232/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q3-2023-earnings-call-transcript-1219468/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q4-2022-earnings-call-transcript-1124544/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q4-2023-earnings-call-transcript-1259127/?singlepage=1',
  'https://www.insidermonkey.com/blog/davita-inc-nysedva-q1-2024-earnings-call-transcript-1296488/?singlepage=1'],
 'MD': ['https://www.insidermonkey.com/blog/pediatrix-medical-group-inc-nysemd-q2-2023-earnings-call-transcript-1178632/?singlepage=1',
  'https://www.insidermonkey.com/blog/pediatrix-medical-gr

In [33]:
# Flatten the dictionary into a single list of links
all_links = []
for links in all_company_links.values():
    all_links.extend(links)

all_links

['https://www.insidermonkey.com/blog/davita-inc-nysedva-q2-2024-earnings-call-transcript-1331185/?singlepage=1',
 'https://www.insidermonkey.com/blog/davita-inc-nysedva-q2-2023-earnings-call-transcript-1177070/?singlepage=1',
 'https://www.insidermonkey.com/blog/davita-inc-nysedva-q3-2024-earnings-call-transcript-1383232/?singlepage=1',
 'https://www.insidermonkey.com/blog/davita-inc-nysedva-q3-2023-earnings-call-transcript-1219468/?singlepage=1',
 'https://www.insidermonkey.com/blog/davita-inc-nysedva-q4-2022-earnings-call-transcript-1124544/?singlepage=1',
 'https://www.insidermonkey.com/blog/davita-inc-nysedva-q4-2023-earnings-call-transcript-1259127/?singlepage=1',
 'https://www.insidermonkey.com/blog/davita-inc-nysedva-q1-2024-earnings-call-transcript-1296488/?singlepage=1',
 'https://www.insidermonkey.com/blog/pediatrix-medical-group-inc-nysemd-q2-2023-earnings-call-transcript-1178632/?singlepage=1',
 'https://www.insidermonkey.com/blog/pediatrix-medical-group-inc-nysemd-q3-2023-

In [34]:
# Function to scrape transcript text from a given URL and stop at metadata tags
def scrape_transcript(url, folder):
    try:
        # Generate a filename based on the company name and quarter from the URL
        match = re.search(r'blog/([a-z0-9-]+)-(?:nasdaq|nyse)([a-z]+)-([qQ]\d-\d{4})-', url)
        if match:
            company_name = match.group(1)
            ticker = match.group(2).upper()
            quarter = match.group(3).lower()
            ticker_folder = os.path.join(folder, ticker)
            filename = f"{ticker}-{quarter}.txt"
        else:
            ticker_folder = folder
            filename = "transcript.txt" 

        # Ensure the ticker folder exists
        os.makedirs(ticker_folder, exist_ok=True)

        # Construct the full file path
        file_path = os.path.join(ticker_folder, filename)

        # Check if the file already exists
        if os.path.exists(file_path):
            print(f"File '{filename}' already exists. Skipping download.")
            return None

        # Send a GET request to the website
        response = requests.get(url)
        response.raise_for_status()  # Check if the request was successful

        # Create a BeautifulSoup object to parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract the relevant text in paragraph tags
        paragraphs = soup.find_all('p')

        # Initialize a flag to start extracting after "Earnings Call"
        start_extracting = False
        extracted_text = []

        # Loop through the paragraphs
        for para in paragraphs:
            text = para.get_text()

            # Start extracting when "Earnings Call" is found
            if "Earnings Call" in text and not start_extracting:
                start_extracting = True

            # Stop scraping when the paragraph with the class "metadata tags" is encountered
            if para.get('class') and 'metadata' in para.get('class'):
                break
            
            # Append text to extracted_text if start_extracting is True
            if start_extracting:
                extracted_text.append(text)

        # Combine the extracted paragraphs into a single string
        final_text = "\n".join(extracted_text)

        # Save final_text to a .txt file
        file_path = os.path.join(ticker_folder, filename)
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(final_text)
            
        return final_text
    
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return None

In [35]:
# Scrape and save Earnings Call Transcript
for url in all_links:
    transcript_text = scrape_transcript(url, save_folder)

# Print the extracted transcript
#if transcript_text:
    #pprint(transcript_text)

File 'DVA-q2-2024.txt' already exists. Skipping download.
File 'DVA-q2-2023.txt' already exists. Skipping download.
File 'DVA-q3-2024.txt' already exists. Skipping download.
File 'DVA-q3-2023.txt' already exists. Skipping download.
File 'DVA-q4-2022.txt' already exists. Skipping download.
File 'DVA-q4-2023.txt' already exists. Skipping download.
File 'DVA-q1-2024.txt' already exists. Skipping download.
File 'PANW-q4-2023.txt' already exists. Skipping download.
File 'PANW-q3-2023.txt' already exists. Skipping download.
File 'PANW-q4-2024.txt' already exists. Skipping download.
File 'PANW-q2-2024.txt' already exists. Skipping download.
File 'PANW-q1-2023.txt' already exists. Skipping download.
File 'PANW-q2-2023.txt' already exists. Skipping download.
File 'PANW-q3-2024.txt' already exists. Skipping download.
File 'PANW-q1-2024.txt' already exists. Skipping download.
File 'ZM-q1-2025.txt' already exists. Skipping download.
File 'ZM-q1-2024.txt' already exists. Skipping download.
File 'ZM

###### Codes for scraping given the url

In [None]:
# URL of the website to scrape
url = "https://www.insidermonkey.com/blog/davita-inc-nysedva-q4-2023-earnings-call-transcript-1259127/?singlepage=1"

# Send a GET request to the website
response = requests.get(url)

# Create a BeautifulSoup object to parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')

# Extract the relevant text in paragraph tags
paragraphs = soup.find_all('p')

# Initialize a flag to start extracting after "Earnings Call Transcript"
start_extracting = False
extracted_text = []

# Loop through the paragraphs
for para in paragraphs:
    text = para.get_text()

    # Start extracting when "Earnings Call" is found
    if "Earnings Call" in text:
        start_extracting = True

    # Stop scraping when the paragraph with the class "metadata tags" is encountered
    if para.get('class') and 'metadata' in para.get('class'):
        break
    
    if start_extracting:
        extracted_text.append(text)

# Combine the extracted paragraphs into a single string
final_text = "\n".join(extracted_text)
pprint(final_text)