In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import pandas as pd
import time


In [2]:
def safe(elem, selector):
   
    try:
        return elem.find_element(By.CSS_SELECTOR, selector).text.strip()
    except:
        return "N/A"


In [3]:
def scrape_influencers(url, max_records=120):

    options = webdriver.ChromeOptions()
    # options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")

    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 10)

    driver.get(url)
    time.sleep(4)

    influencers = []

    # FIX: remove navbar blocking clicks
    driver.execute_script("""
        let nav = document.querySelector('.navbar-logo-left-container');
        if (nav) nav.style.display = 'none';
    """)

    for page in range(1, 11):

        print(f"\n--- Opening Tab {page} ---")

        tab_selector = f'a[data-w-tab="Tab {page}"]'
        pane_selector = f'div[data-w-tab="Tab {page}"]'

        # Click the tab
        try:
            tab = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, tab_selector)))

            driver.execute_script("arguments[0].scrollIntoView({behavior:'instant'});", tab)
            time.sleep(0.5)
            driver.execute_script("arguments[0].click();", tab)

        except Exception as e:
            print(" Cannot click tab:", e)
            continue

        # Load pane
        try:
            time.sleep(1.5)
            pane = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, pane_selector)))
        except TimeoutException:
            print(" Pane did not load.")
            continue

        cards = pane.find_elements(By.CSS_SELECTOR, 'div[role="listitem"]')
        print(f"Found {len(cards)} influencers.")

        # Extract data
        for card in cards:
            if len(influencers) >= max_records:
                break

            full_name = safe(card, ".ig-user")
            handle = safe(card, ".influencer-name")

            if any(x["Handle"] == handle for x in influencers):
                continue

            influencers.append({
                "Rank": len(influencers) + 1,
                "Full Name": full_name,
                "Handle": handle,
            })

            print(f" Added: @{handle}")

    driver.quit()
    return influencers


In [5]:
URL = "https://topnine.co/instagram-influencers"
data = scrape_influencers(URL, 120)

print("\nCollected:", len(data))

if data:
    df = pd.DataFrame(data)
    save_path = r"C:\Users\Usman Asghar\Desktop\Intern\Instagram Web Scraping\Excel\influencers.xlsx"
    df.to_excel(save_path, index=False)
    print("Saved →", save_path)



--- Opening Tab 1 ---
Found 40 influencers.
 Added: @cristiano
 Added: @taylorswift
 Added: @selenagomez
 Added: @arianagrande
 Added: @kendalljenner
 Added: @neymarjr
 Added: @leomessi
 Added: @beyonce
 Added: @lalalalisa_m
 Added: @justinbieber
 Added: @thv
 Added: @jennierubyjane
 Added: @kimkardashian
 Added: @kyliejenner
 Added: @sooyaaa__
 Added: @j.m
 Added: @jin
 Added: @uarmyhope
 Added: @virat.kohli
 Added: @iamcardib
 Added: @billieeilish
 Added: @roses_are_rosie
 Added: @karolg
 Added: @k.mbappe
 Added: @mileycyrus
 Added: @champagnepapi
 Added: @rkive
 Added: @khloekardashian
 Added: @tomholland2013
 Added: @georginagio
 Added: @eunwo.o_c
 Added: @narendramodi
 Added: @instagram
 Added: @therock
 Added: @bellahadid
 Added: @zayn
 Added: @adele
 Added: @jlo
 Added: @kingjames
 Added: @dualipa

--- Opening Tab 2 ---
Found 40 influencers.
 Added: @harrystyles
 Added: @aliaabhatt
 Added: @shawnmendes
 Added: @vinijr
 Added: @bts.bighitofficial
 Added: @rohitsharma45
 Added: @