In [1]:
import csv
import time
import pyperclip
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import WebDriverException

# ── CONFIG ──────────────────────────────────────────────────────────────────────
CHROMEDRIVER_PATH = "./chromedriver"
CSV_FILE = "quartr_transcripts.csv"
SCROLL_STEP = 400
PAUSE = 0.5
# ────────────────────────────────────────────────────────────────────────────────

def scroll_to_index(driver, container, index, wait, max_attempts=20):
    for _ in range(max_attempts):
        try:
            driver.execute_script("""
                const container = arguments[0];
                container.scrollTop = arguments[1] * 100;
            """, container, int(index))
            time.sleep(PAUSE)
            row = driver.find_element(By.CSS_SELECTOR, f"div[data-index='{index}']")
            return row
        except:
            time.sleep(0.3)
    return None

def extract_transcript(driver, wait):
    transcript_text = ""
    try:
        main_xpath = "//div[contains(@class,'document-content')]"
        wait.until(EC.presence_of_element_located((By.XPATH, main_xpath)))
        sections = driver.find_elements(By.XPATH, f"{main_xpath}//div[contains(@class, 'group') and contains(@class, 'relative')]")

        for section in sections:
            speaker_info = ""
            try:
                name = section.find_element(By.XPATH, ".//div[contains(@class, 'font-semibold')]").text.strip()
                position = section.find_element(By.XPATH, ".//div[contains(@class, 'font-medium')]//span").text.strip()
                speaker_info = f"[{name} ({position})]: "
            except:
                try:
                    name = section.find_element(By.XPATH, ".//div[contains(@class, 'font-semibold')]").text.strip()
                    speaker_info = f"[{name}]: "
                except:
                    pass

            body = " ".join([
                s.text.strip()
                for s in section.find_elements(By.XPATH, ".//span[contains(@class, 'select-text')]")
                if s.text.strip()
            ])
            if body:
                transcript_text += f"{speaker_info}{body}\n\n"

        if transcript_text.strip():
            return transcript_text.strip()
    except:
        pass

    # Try copy button
    try:
        copy_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@data-sentry-component='TranscriptCopyAction']")))
        copy_button.click()
        time.sleep(0.7)
        return pyperclip.paste().strip()
    except:
        pass

    # Raw fallback
    try:
        main_container = driver.find_element(By.XPATH, "//div[contains(@class,'document-content')]")
        return main_container.text.strip()
    except:
        return ""

def main():
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--start-maximized")
    service = Service(CHROMEDRIVER_PATH)
    driver = webdriver.Chrome(service=service, options=chrome_options)
    wait = WebDriverWait(driver, 20)
    scraped_data = []

    # ── LOGIN ────────────────────────────────────────────────────────────
    driver.get(
        "https://auth.quartr.com/realms/prod/protocol/openid-connect/auth"
        "?response_type=code&client_id=web&redirect_uri=https%3A%2F%2Fweb.quartr.com"
        "%2Fapi%2Fauth%2Fcallback%2Fkeycloak&code_challenge=iQ7FuQ_Xo7pFKL2wwzpLRRTbVqGLTpG-NmyU0oG7iKQ"
        "&code_challenge_method=S256&scope=openid+profile+email"
    )
    print("🔑 Please log in manually.")
    input("    Press Enter when logged in... ")
    wait.until(EC.url_contains("web.quartr.com"))
    print("✅ Logged in successfully.")

    # ── NAVIGATE TO CALENDAR ─────────────────────────────────────────────
    try:
        calendar_button = wait.until(EC.element_to_be_clickable((
            By.XPATH, "//a[@href='/calendar' and @data-menu-list-item='true']"
        )))
        calendar_button.click()
        print("📅 Navigated to Calendar page.")
    except Exception as e:
        print(f"❌ Failed to click the Calendar button: {e}")
        driver.quit()
        return

    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[data-index]")))

    # ── FIND SCROLLABLE CONTAINER ────────────────────────────────────────
    row_sample = driver.find_element(By.CSS_SELECTOR, "div[data-index]")
    scrollable_container = driver.execute_script("""
        let el = arguments[0];
        while (el.parentNode) {
            el = el.parentNode;
            if (el.scrollHeight > el.clientHeight + 10) return el;
        }
        return null;
    """, row_sample)
    if not scrollable_container:
        print("❌ Could not find scrollable container.")
        driver.quit()
        return

    # ── COLLECT UNIQUE data-index ROWS ───────────────────────────────────
    print("🔄 Scanning for all row indices...")
    unique_indexes = set()
    last_count = 0
    no_new_scrolls = 0
    max_no_new = 7

    while no_new_scrolls < max_no_new:
        rows = driver.find_elements(By.CSS_SELECTOR, "div[data-index]")
        for r in rows:
            idx = r.get_attribute("data-index")
            if idx:
                unique_indexes.add(idx)

        driver.execute_script("arguments[0].scrollTop += arguments[1];", scrollable_container, SCROLL_STEP)
        time.sleep(PAUSE)

        if len(unique_indexes) == last_count:
            no_new_scrolls += 1
        else:
            no_new_scrolls = 0
            last_count = len(unique_indexes)

    print(f"✅ Found {len(unique_indexes)} unique rows.")

    # ── PROCESS EACH ROW ─────────────────────────────────────────────────
    for idx in sorted(unique_indexes, key=lambda x: int(x)):
        try:
            print(f"\n🔍 Processing row {idx}...")
            row = scroll_to_index(driver, scrollable_container, idx, wait)
            if not row:
                print(f"⚠️ Row {idx} not found after scrolling.")
                continue

            boxes = row.find_elements(By.CSS_SELECTOR, "div.px-3")
            for box_idx, box in enumerate(boxes):
                try:
                    button = box.find_element(By.CSS_SELECTOR, 'span[data-sentry-component="ShowTranscriptButton"]')

                    # Extract company and title
                    company = ""
                    title = ""
                    try:
                        company = box.find_element(By.CSS_SELECTOR, 'span.text-clip').text.strip()
                    except:
                        pass
                    try:
                        title = box.find_element(By.CSS_SELECTOR, 'span.text-ellipsis').text.strip()
                    except:
                        pass

                    print(f"▶️ Row {idx}, Box {box_idx} → {company} – {title}")

                    driver.execute_script("arguments[0].scrollIntoView({block:'center'});", button)
                    time.sleep(0.5)
                    button.click()
                    time.sleep(1.5)

                    transcript = extract_transcript(driver, wait)

                    if transcript:
                        scraped_data.append({
                            "Row": idx,
                            "Box": box_idx,
                            "Company": company,
                            "Title": title,
                            "Transcript": transcript
                        })
                        print(f"✅ Transcript extracted ({len(transcript)} characters)")
                    else:
                        print("⚠️ Empty transcript.")

                    driver.back()
                    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[data-index]")))
                    time.sleep(PAUSE)
                except Exception:
                    continue
        except WebDriverException as wde:
            print(f"❌ WebDriver error on row {idx}: {wde}")
        except Exception as e:
            print(f"❌ Error on row {idx}: {e}")
            continue

    # ── SAVE TO CSV ──────────────────────────────────────────────────────
    if scraped_data:
        with open(CSV_FILE, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=["Row", "Box", "Company", "Title", "Transcript"])
            writer.writeheader()
            writer.writerows(scraped_data)
        print(f"\n💾 Saved {len(scraped_data)} transcripts to {CSV_FILE}")
    else:
        print("\n❌ No transcripts captured.")

    driver.quit()
    print("🏁 Done.")

if __name__ == "__main__":
    main()


🔑 Please log in manually.


    Press Enter when logged in...  


✅ Logged in successfully.
📅 Navigated to Calendar page.
🔄 Scanning for all row indices...
✅ Found 12 unique rows.

🔍 Processing row 0...

🔍 Processing row 1...

🔍 Processing row 2...

🔍 Processing row 3...

🔍 Processing row 4...

🔍 Processing row 5...

🔍 Processing row 6...

🔍 Processing row 7...

🔍 Processing row 8...

🔍 Processing row 9...

🔍 Processing row 10...

🔍 Processing row 11...

❌ No transcripts captured.
🏁 Done.
