# Twitter Profile Scrapper

## How to use

### Setup:
--> Ensure you have Python installed.

--> Install the required libraries using pip (selenium, pandas, webdriver_manager, etc.).

### Run the Script:
--> Execute the provided script in your terminal or IDE.

### Input Usernames:
--> When prompted, enter the number of Twitter profiles you wish to scrape.

--> Provide the usernames for the entered number of profiles.

### Login to Twitter:
--> A browser window (Chrome) will open, taking you to the Twitter login page.

--> Manually log in to your Twitter account within the allotted 25 seconds.

### Wait:
--> The script will automatically visit each profile, scrape the desired information, and store it in memory.

### Check the Output:
--> Once the script completes, find two Excel files in the script's directory: twitter_data.xlsx (contains profile information) and twitter_hashtags.xlsx (contains hashtags from the tweets).

### Done!:
--> Review the scraped data in the Excel file.

--> If any data is not present, it will leave a blank space in that column.
                                                            
######                                                           HaPPy ScraPing


In [None]:
# Importing Necessary library 
import re
import time
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import WebDriverException

# Function to introduce a random delay time
def delay():
    time.sleep(random.uniform(3, 7))

# Function to scrape a given Twitter profile
def scrape_twitter_profile(username, num_tweets):
    delay()  # Introducing a delay before scraping
    URL = "https://twitter.com/" + username + "?lang=en"
    driver.get(URL)
    delay()

    # Waiting for the tweets to load on the page
    try:
        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, '[data-testid="tweet"]')))
    except WebDriverException:
        print(f"Tweets did not appear for {username}! Proceeding after timeout")

    data = {}
    # Extracting profile information with try-except blocks for each data point
    try:
        data["Name"] = driver.find_element(By.CSS_SELECTOR,'div[data-testid="UserName"]').text.split('\n')[0]
    except:
        data["Name"] = None

    try:
        data["Bio"] = driver.find_element(By.CSS_SELECTOR,'div[data-testid="UserDescription"]').text
    except:
        data["Bio"] = None

    try:
        data["Location"] = driver.find_element(By.CSS_SELECTOR,'span[data-testid="UserLocation"]').text
    except:
        data["Location"] = None

    try:
        data["Website"] = driver.find_element(By.CSS_SELECTOR,'a[data-testid="UserUrl"]').text
    except:
        data["Website"] = None

    try:
        data["Joined on"] = driver.find_element(By.CSS_SELECTOR,'span[data-testid="UserJoinDate"]').text
    except:
        data["Joined on"] = None

    try:
        data["Following"] = driver.find_element(By.XPATH, "//span[contains(text(), 'Following')]/ancestor::a/span").text
    except:
        data["Following"] = None

    try:
        data["Followers"] = driver.find_element(By.XPATH, "//span[contains(text(), 'Followers')]/ancestor::a/span").text
    except:
        data["Followers"] = None

    delay()

    # Extracting the required number of tweets and their hashtags
    tweets = driver.find_elements(By.CSS_SELECTOR, '[data-testid="tweet"]')[:num_tweets]
    hashtags = {"Name": username}
    for i, tweet in enumerate(tweets):
        delay()
        try:
            tweet_text = tweet.find_element(By.CSS_SELECTOR,'div[data-testid="tweetText"]').text
            data[f"Tweet {i+1}"] = tweet_text
            hashtags[f"Tweet {i+1}"] = ', '.join(re.findall(r"(#\w+)", tweet_text))
        except:
            data[f"Tweet {i+1}"] = None
            hashtags[f"Tweet {i+1}"] = None
        delay()

    return data, hashtags

# Getting user input for number of profiles and their usernames
num_profiles = int(input("Enter the number of Twitter profiles you want to scrape: "))
usernames = [input(f"Enter username {i+1}: ") for i in range(num_profiles)]
num_tweets_to_scrape = 3

# Initializing Chrome web driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Opening Twitter login page and wait for manual user login
driver.get("https://twitter.com/login?lang=en")
print("Please login manually within the next 25 seconds...")
time.sleep(25)

# Lists to store scraped data
data_list = []
hashtags_list = []

# Scraping each profile
for username in usernames:
    data, hashtags = scrape_twitter_profile(username, num_tweets_to_scrape)
    data_list.append(data)
    hashtags_list.append(hashtags)

    print(f"Scraping completed for {username}")
    delay()

print("Scraping completed for all profiles. Now processing data...")

# Saving data into Excel files
df = pd.DataFrame(data_list)
df.to_excel("twitter_data.xlsx", index=False)

df_hashtags = pd.DataFrame(hashtags_list)
df_hashtags.to_excel("twitter_hashtags.xlsx", index=False)

driver.quit()

print("Data processing and saving completed.")
print("Thank you!")
