In [6]:
import pandas as pd
import time
import re
from pytrends.request import TrendReq
from tqdm import tqdm

# 1. Load S&P 500 tickers from Wikipedia
sp500_df = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")[0]
tickers = sp500_df["Symbol"].tolist()

# Some tickers like BRK.B need to be cleaned for pytrends
tickers = [t.replace(".", "-") for t in tickers]

# 2. Set up pytrends
pytrends = TrendReq(hl="en-US", tz=360)

# 3. Define the timeframe and storage
timeframe = "2020-01-01 2020-12-31"
trend_data = []

# 4. Loop through tickers
for ticker in tqdm(tickers, desc="Fetching Google Trends"):
    keyword = f"{ticker} stock"
    try:
        pytrends.build_payload([keyword], timeframe=timeframe, geo="US")
        df = pytrends.interest_over_time()

        if not df.empty:
            df = df.reset_index()[["date", keyword]]
            df = df.rename(columns={keyword: "trend_score"})
            df["ticker"] = ticker
            trend_data.append(df)

        # Sleep to avoid being rate-limited
        time.sleep(3)

    except Exception as e:
        print(f"⚠️ Error fetching {ticker}: {e}")
        time.sleep(90)  # wait longer if rate limited

# 5. Combine and save
if trend_data:
    trend_df = pd.concat(trend_data, ignore_index=True)
    trend_df.to_csv("/home/rupam/DataAlpha/data/raw/Google_trends/google_trends_2020.csv", index=False)
    print("✅ Google Trends data saved to: data/processed/google_trends_2020.csv")
else:
    print("❌ No trend data fetched.")


Fetching Google Trends:  55%|█████▌    | 279/503 [19:37<15:19,  4.10s/it]

⚠️ Error fetching KR: The request failed: Google returned a response with code 429


Fetching Google Trends:  56%|█████▋    | 284/503 [21:29<39:24, 10.80s/it]  

⚠️ Error fetching LVS: HTTPSConnectionPool(host='trends.google.com', port=443): Max retries exceeded with url: /trends/api/explore?hl=en-US&tz=360&req=%7B%22comparisonItem%22%3A+%5B%7B%22keyword%22%3A+%22LVS+stock%22%2C+%22time%22%3A+%222020-01-01+2020-12-31%22%2C+%22geo%22%3A+%22US%22%7D%5D%2C+%22category%22%3A+0%2C+%22property%22%3A+%22%22%7D (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7635e508fed0>: Failed to resolve 'trends.google.com' ([Errno -3] Temporary failure in name resolution)"))


Fetching Google Trends: 100%|██████████| 503/503 [38:21<00:00,  4.58s/it]  

✅ Google Trends data saved to: data/processed/google_trends_2020.csv



