In [1]:
from selenium import webdriver 
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
import random

# ‚úÖ Setup
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# ‚úÖ LinkedIn search for "depression"
linkedin_url = "https://www.linkedin.com/search/results/content/?keywords=depression&origin=SWITCH_SEARCH_VERTICAL"

# ‚úÖ Open LinkedIn and login manually
driver.get("https://www.linkedin.com/")
input("‚úÖ Please log in to LinkedIn manually and press Enter here once logged in...")

driver.get(linkedin_url)
time.sleep(5)

# ‚úÖ Data storage
conversations = []
unique_posts = set()
conversations_to_scrape = 100

# ‚úÖ Scraping loop
while len(conversations) < conversations_to_scrape:
    for _ in range(5):
        driver.execute_script("window.scrollBy(0, 400);")
        time.sleep(random.uniform(2, 4))

    try:
        post_elements = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.XPATH, '//div[contains(@class, "feed-shared-update-v2")]'))
        )
    except Exception as e:
        print("Posts not found. Retrying...")
        continue

    print(f"üîç Found {len(post_elements)} post containers.")

    for post in post_elements:
        if len(conversations) >= conversations_to_scrape:
            break

        try:
            username = post.find_element(By.XPATH, './/span[@aria-hidden="true"]').text.strip()
        except:
            username = 'Unknown'

        try:
            content = post.find_element(By.XPATH, './/div[contains(@class, "update-components-text relative update-components-update-v2__commentary")]').text.strip()
        except:
            content = 'No content'

        try:
            reaction_elem = post.find_element(By.CLASS_NAME, 'social-details-social-counts__reactions-count')
            reactions = int(''.join(filter(str.isdigit, reaction_elem.text.strip())))
        except:
            reactions = 0

        try:
            comment_elem = post.find_element(By.XPATH, './/li[2]//li[1]//button[contains(@class, "social-details-social-counts__count-value")]')
            comments = int(''.join(filter(str.isdigit, comment_elem.text.strip())))
        except:
            comments = 0

        try:
            repost_elem = post.find_element(By.XPATH, './/span[contains(@class, "social-details-social-counts__item--truncate-text")]')
            reposts = int(''.join(filter(str.isdigit, repost_elem.text.strip())))
        except:
            reposts = 0

        # ‚úÖ Filter: at least 1 reaction or comment or repost
        if content not in unique_posts and (reactions >= 1 or comments >= 1 or reposts >= 1):
            unique_posts.add(content)
            conversations.append({
                'username': username,
                'content': content,
                'reactions': reactions,
                'comments': comments,
                'reposts': reposts
            })
            print(f"‚úÖ {len(conversations)}/{conversations_to_scrape}: {username} | üëç {reactions} üí¨ {comments} üîÅ {reposts}")
        else:
            print("‚ö†Ô∏è Skipped (Duplicate or no interaction)...")

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(random.uniform(3, 5))

# ‚úÖ Save final dataset
df = pd.DataFrame(conversations)
df.to_csv("linkedin_depression_100full.csv", index=False, encoding="utf-8")
print(f"‚úÖ Done! Saved {len(df)} posts to linkedin_depression_100full.csv")

driver.quit()


üîç Found 32 post containers.
‚úÖ 1/100: Amit Rastogii | üëç 1 üí¨ 0 üîÅ 0
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚úÖ 2/100: Yashveer Bhardwaj | üëç 1 üí¨ 0 üîÅ 0
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚úÖ 3/100: Kesh Alagasi | üëç 29 üí¨ 13 üîÅ 0
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚úÖ 4/100: Elvis Warutumo | üëç 328 üí¨ 40 üîÅ 0
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
‚ö†Ô∏è Skipped (Duplicate or no interaction)...
