In [None]:
import os
import requests
import certifi
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup
import json

WIKI_URL = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
COMPANY_DIR = "company_revenue_results"

# List of API keys to rotate between
API_KEYS = ["iq6DT26XPhKSubtLN7RUaavctUknriHy", "U2liplE4h7atJ1E9iAirE2cdCtxMi8Ve", "6wGoNRskPwA23aw0EMgWEN1JDRRVcY8M"]
CALL_LIMIT_PER_KEY = 200  # Each API key has a limit of 250 calls per day

# Initialize counters for API key usage
api_call_counters = [0] * len(API_KEYS)
current_key_index = 0

def get_current_api_key():
    global current_key_index
    global api_call_counters
    
    # Rotate to the next API key if the current one reaches its limit
    if api_call_counters[current_key_index] >= CALL_LIMIT_PER_KEY:
        current_key_index = (current_key_index + 1) % len(API_KEYS)
    
    # Increment the call counter for the current key
    api_call_counters[current_key_index] += 1
    
    # Return the current API key
    return API_KEYS[current_key_index]

def scrape_wikipedia_sp500():
    """Scrape S&P 500 companies and their sectors from Wikipedia."""
    response = requests.get(WIKI_URL)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    table = soup.find('table', {'id': 'constituents'})
    rows = table.find_all('tr')[1:]  # Skip header row
    
    companies_data = []
    for row in rows:
        cols = row.find_all('td')
        ticker = cols[0].text.strip()
        company_name = cols[1].text.strip()
        sector = cols[3].text.strip()
        companies_data.append((ticker, company_name, sector))
    
    return companies_data

def get_jsonparsed_data(url):
    response = urlopen(url, cafile=certifi.where())
    data = response.read().decode("utf-8")
    return json.loads(data)

def get_quarterly_revenue(symbol: str, start_date: str, end_date: str) -> pd.DataFrame:
    """
    Fetch and process quarterly revenue data from Financial Modeling Prep API.
    """
    api_key = get_current_api_key()  # Get the current API key
    base_url = f"https://financialmodelingprep.com/api/v3/income-statement/{symbol}"
    url = f"{base_url}?period=annual&limit=400&apikey={api_key}"
    
    try:
        # Fetch data
        data = get_jsonparsed_data(url)
        if not data:
            return None
        
        # Convert to DataFrame
        df = pd.DataFrame(data)
        
        # Convert date column and set as index
        df['date'] = pd.to_datetime(df['date'])
        
        # Filter date range
        mask = (df['date'] >= start_date) & (df['date'] <= end_date)
        df = df[mask].copy()
        
        # Sort by date
        df = df.sort_values('date')
        
        # Extract quarter and year
        df['quarter'] = df['date'].dt.quarter
        df['year'] = df['date'].dt.year
        
        # Calculate YoY growth
        df['revenue_yoy_growth'] = df.groupby('quarter')['revenue'].pct_change(4) * 100
        
        # Format results
        result_df = df[['date', 'quarter', 'year', 'revenue', 'revenue_yoy_growth']].copy()
        result_df['revenue'] = result_df['revenue'].round(2)
        result_df['revenue_yoy_growth'] = result_df['revenue_yoy_growth'].round(2)
        
        return result_df
    
    except Exception as e:
        print(f"Error processing data for {symbol}: {e}")
        return None

def process_company_revenue(ticker, company_name, start_date, end_date):
    """Process revenue data for a company and save it."""
    company_revenue_df = get_quarterly_revenue(ticker, start_date, end_date)
    
    if company_revenue_df is not None:
        company_revenue_df['ticker'] = ticker
        company_revenue_df['company_name'] = company_name
        
        # Save the company data as soon as processed
        csv_filename = os.path.join(COMPANY_DIR, f"{ticker}_revenue.csv")
        
        company_revenue_df.to_csv(csv_filename, index=False, mode='w', header=True)  # Write data to CSV
        print(f"Saved revenue data for {ticker} to {csv_filename}")
    else:
        print(f"No data found for {ticker}.")

def main():
    os.makedirs(COMPANY_DIR, exist_ok=True)
    
    # Scrape SP500 companies and their sectors
    companies_data = scrape_wikipedia_sp500()

    # Define start and end dates for revenue data
    start_date = "2015-01-01"
    end_date = "2024-06-30"
    
    # Process each company
    for ticker, company_name, sector in companies_data:
        print(f"\nProcessing company: {company_name} ({ticker})")
        process_company_revenue(ticker, company_name, start_date, end_date)

if __name__ == "__main__":
    main()
