In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests
from bs4 import BeautifulSoup
import time

base_url = "https://ogjre.com/transcripts"
output_file = "joe_rogan_podcast_transcripts.txt"
titles_file = "episode_titles.txt"

def setup_driver():
    options = webdriver.ChromeOptions()
    # options.add_argument("--headless")  # Uncomment to run in headless mode
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    return webdriver.Chrome(options=options)

def wait_for_element(driver, by, value, timeout=10):
    return WebDriverWait(driver, timeout).until(EC.presence_of_element_located((by, value)))

def fetch_all_episodes(driver):
    driver.get(base_url)
    time.sleep(5)
    print("Webpage loaded. Starting to scroll...")

    episodes = []
    scroll_pause_time = 2
    max_no_new_content_attempts = 10
    no_new_content_attempts = 0
    last_height = driver.execute_script("return document.body.scrollHeight")

    while no_new_content_attempts < max_no_new_content_attempts:
        try:
            episode_elements = driver.find_elements(By.CLASS_NAME, "VideoSingle__VideoSingleStyles-sc-dngnuh-0")
            new_episodes_found = False

            for episode in episode_elements:
                try:
                    link = episode.find_element(By.TAG_NAME, "a").get_attribute("href")
                    title = episode.find_element(By.CLASS_NAME, "vs-video-title").text
                    if (title, link) not in episodes:
                        episodes.append((title, link))
                        new_episodes_found = True
                except Exception as e:
                    print(f"Error extracting episode: {e}")

            if new_episodes_found:
                no_new_content_attempts = 0
            else:
                no_new_content_attempts += 1

            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(scroll_pause_time)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height
        except Exception as e:
            print(f"Error during scrolling: {e}")
            break

    return episodes

def fetch_transcript(episode_url):
    print(f"Fetching transcript for {episode_url}...")
    response = requests.get(episode_url)
    if response.status_code != 200:
        return "Transcript not available."

    soup = BeautifulSoup(response.content, "html.parser")
    transcript_tag = soup.find("p", class_="chakra-text ssc-transcript css-0")
    return transcript_tag.get_text(strip=True) if transcript_tag else "Transcript not available."

def main():
    driver = setup_driver()
    try:
        episodes = fetch_all_episodes(driver)
        print(f"Found {len(episodes)} episodes.")

        # Save episode titles
        with open(titles_file, "w", encoding="utf-8") as title_file:
            for title, _ in episodes:
                title_file.write(f"{title}\n")

        # Save episode transcripts
        with open(output_file, "w", encoding="utf-8") as file:
            for title, link in episodes:
                print(f"Scraping episode: {title}")
                transcript = fetch_transcript(link)
                file.write(f"Episode Title: {title}\n")
                file.write(f"Transcript:\n{transcript}\n")
                file.write("=" * 80 + "\n")

        print(f"Titles saved in {titles_file}")
        print(f"Transcripts saved in {output_file}")
    finally:
        driver.quit()

if __name__ == "__main__":
    main()
