In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(options=options)

input_file = "twitter_links.csv"
df = pd.read_csv(input_file)

output_file = "twitter_profiles.csv"
columns = ["Twitter Link", "Bio", "Following Count", "Followers Count", "Location", "Website"]
output_data = []

def scrape_twitter_profile(url):
    driver.get(url)
    wait = WebDriverWait(driver, 10)
    
    def get_text(xpath):
        try:
            element = wait.until(EC.presence_of_element_located((By.XPATH, xpath)))
            return element.text.strip()
        except:
            return "N/A"
    
    def get_href(xpath):
        try:
            element = wait.until(EC.presence_of_element_located((By.XPATH, xpath)))
            return element.get_attribute("href")
        except:
            return "N/A"
    
    bio = get_text("/html/body/div[1]/div/div/div[2]/main/div/div/div/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/span")
    following_count = get_text("/html/body/div[1]/div/div/div[2]/main/div/div/div/div/div/div[3]/div/div/div[1]/div/div[5]/div[1]/a/span[1]/span")
    followers_count = get_text("/html/body/div[1]/div/div/div[2]/main/div/div/div/div/div/div[3]/div/div/div[1]/div/div[5]/div[2]/a/span[1]/span")
    location = get_text("/html/body/div[1]/div/div/div[2]/main/div/div/div/div/div/div[3]/div/div/div[1]/div/div[4]/div/span[1]/span/span")
    website = get_href("/html/body/div[1]/div/div/div[2]/main/div/div/div/div/div/div[3]/div/div/div[1]/div/div[4]/div/a/span")
    
    return [url, bio, following_count, followers_count, location, website]

for index, row in df.iterrows():
    twitter_link = row.iloc[0]
    try:
        profile_data = scrape_twitter_profile(twitter_link)
        output_data.append(profile_data)
    except Exception as e:
        print(f"Error scraping {twitter_link}: {e}")
        output_data.append([twitter_link, "Error", "Error", "Error", "Error", "Error"])

output_df = pd.DataFrame(output_data, columns=columns)
output_df.to_csv(output_file, index=False)

driver.quit()
print(f"Scraped data saved to {output_file}")


Scraped data saved to twitter_profiles.csv
