In [None]:
from urllib.request import urlopen
import certifi
import json
import pandas as pd
from typing import List, Dict

def get_jsonparsed_data(url):
    response = urlopen(url, cafile=certifi.where())
    data = response.read().decode("utf-8")
    return json.loads(data)

def get_quarterly_revenue(api_key: str, symbol: str, start_date: str, end_date: str) -> pd.DataFrame:
    """
    Fetch and process quarterly revenue data from Financial Modeling Prep API.
    
    Args:
        api_key (str): Your FMP API key
        symbol (str): Stock symbol
        start_date (str): Start date in YYYY-MM-DD format
        end_date (str): End date in YYYY-MM-DD format
    
    Returns:
        pd.DataFrame: Processed quarterly revenue data with YoY growth
    """
    
    # Construct URL for FMP API

    # url = ("https://financialmodelingprep.com/api/v3/income-statement/AAPL?period=annual&limit=400&apikey=lhjT6XFmqVkPV78AlKyuNtip6aymeVgT")
    # print(url,get_jsonparsed_data(url))
    
    base_url = f"https://financialmodelingprep.com/api/v3/income-statement/{symbol}"
    url = (f"{base_url}?period=annual&limit=400&apikey={api_key}")
    print(url)
    res=get_jsonparsed_data(url)
    print(res)
    try:
        # Fetch data
        data = get_jsonparsed_data(url)
        if not data:
            return None
            
        # Convert to DataFrame
        df = pd.DataFrame(data)
        
        # Convert date column and set as index
        df['date'] = pd.to_datetime(df['date'])
        
        # Filter date range
        mask = (df['date'] >= start_date) & (df['date'] <= end_date)
        df = df[mask].copy()
        
        # Sort by date
        df = df.sort_values('date')
        
        # Extract quarter and year
        df['quarter'] = df['date'].dt.quarter
        df['year'] = df['date'].dt.year
        
        # Calculate YoY growth
        df['revenue_yoy_growth'] = df.groupby('quarter')['revenue'].pct_change(4) * 100
        
        # Format results
        result_df = df[[
            'date', 'quarter', 'year', 'revenue', 'revenue_yoy_growth'
        ]].copy()
        
        # Round numbers for better readability
        result_df['revenue'] = result_df['revenue'].round(2)
        result_df['revenue_yoy_growth'] = result_df['revenue_yoy_growth'].round(2)
        
        return result_df
    
    except Exception as e:
        print(f"Error processing data: {e}")
        return None

# Example usage
if __name__ == "__main__":
    # Your API key
    API_KEY = "lhjT6XFmqVkPV78AlKyuNtip6aymeVgT"  # Replace with your actual API key if different
    SYMBOL = "AAPL"
    START_DATE = "2015-03-31"
    END_DATE = "2024-06-30"
    
    results = get_quarterly_revenue(
        api_key=API_KEY,
        symbol=SYMBOL,
        start_date=START_DATE,
        end_date=END_DATE
    )
    
    print(results)
    
    if results is not None:
        # Display results
        pd.set_option('display.float_format', lambda x: '%.2f' % x)
        print("\nQuarterly Revenue YoY Growth:")
        print(results.to_string(index=False))
        
        # Export to CSV
        csv_filename = f"{SYMBOL}_quarterly_revenue_growth.csv"
        results.to_csv(csv_filename, index=False)
        print(f"\nResults saved to {csv_filename}")
        
        # Display some summary statistics
        print("\nSummary Statistics:")
        print(f"Average YoY Growth: {results['revenue_yoy_growth'].mean():.2f}%")
        print(f"Max YoY Growth: {results['revenue_yoy_growth'].max():.2f}%")
        print(f"Min YoY Growth: {results['revenue_yoy_growth'].min():.2f}%")

In [None]:
import os
import requests
import certifi
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup
import json

WIKI_URL = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
SECTOR_DIR = "sector_revenue_results"

# List of API keys to rotate between
API_KEYS = ["iq6DT26XPhKSubtLN7RUaavctUknriHy", "U2liplE4h7atJ1E9iAirE2cdCtxMi8Ve", "6wGoNRskPwA23aw0EMgWEN1JDRRVcY8M"]
CALL_LIMIT_PER_KEY = 200  # Each API key has a limit of 250 calls per day

# Initialize counters for API key usage
api_call_counters = [0] * len(API_KEYS)
current_key_index = 0

def get_current_api_key():
    global current_key_index
    global api_call_counters
    
    # Rotate to the next API key if the current one reaches its limit
    if api_call_counters[current_key_index] >= CALL_LIMIT_PER_KEY:
        current_key_index = (current_key_index + 1) % len(API_KEYS)
    
    # Increment the call counter for the current key
    api_call_counters[current_key_index] += 1
    
    # Return the current API key
    return API_KEYS[current_key_index]

def scrape_wikipedia_sp500():
    """Scrape S&P 500 companies and their sectors from Wikipedia."""
    response = requests.get(WIKI_URL)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    table = soup.find('table', {'id': 'constituents'})
    rows = table.find_all('tr')[1:]  # Skip header row
    
    companies_data = []
    for row in rows:
        cols = row.find_all('td')
        ticker = cols[0].text.strip()
        company_name = cols[1].text.strip()
        sector = cols[3].text.strip()
        companies_data.append((ticker, company_name, sector))
    
    return companies_data

def get_jsonparsed_data(url):
    response = urlopen(url, cafile=certifi.where())
    data = response.read().decode("utf-8")
    return json.loads(data)

def get_quarterly_revenue(symbol: str, start_date: str, end_date: str) -> pd.DataFrame:
    """
    Fetch and process quarterly revenue data from Financial Modeling Prep API.
    """
    api_key = get_current_api_key()  # Get the current API key
    base_url = f"https://financialmodelingprep.com/api/v3/income-statement/{symbol}"
    url = f"{base_url}?period=annual&limit=400&apikey={api_key}"
    
    try:
        # Fetch data
        data = get_jsonparsed_data(url)
        if not data:
            return None
        
        # Convert to DataFrame
        df = pd.DataFrame(data)
        
        # Convert date column and set as index
        df['date'] = pd.to_datetime(df['date'])
        
        # Filter date range
        mask = (df['date'] >= start_date) & (df['date'] <= end_date)
        df = df[mask].copy()
        
        # Sort by date
        df = df.sort_values('date')
        
        # Extract quarter and year
        df['quarter'] = df['date'].dt.quarter
        df['year'] = df['date'].dt.year
        
        # Calculate YoY growth
        df['revenue_yoy_growth'] = df.groupby('quarter')['revenue'].pct_change(4) * 100
        
        # Format results
        result_df = df[['date', 'quarter', 'year', 'revenue', 'revenue_yoy_growth']].copy()
        result_df['revenue'] = result_df['revenue'].round(2)
        result_df['revenue_yoy_growth'] = result_df['revenue_yoy_growth'].round(2)
        
        return result_df
    
    except Exception as e:
        print(f"Error processing data for {symbol}: {e}")
        return None

def process_sector_revenue(sector, companies, start_date, end_date):
    """Process revenue data for companies in a given sector and save it."""
    sector_file = os.path.join(SECTOR_DIR, f"{sector}_revenue.csv")
    
    for ticker, company_name in companies:
        print(f"Processing {ticker} ({company_name}) in sector {sector}...")
        company_revenue_df = get_quarterly_revenue(ticker, start_date, end_date)
        
        if company_revenue_df is not None:
            company_revenue_df['ticker'] = ticker
            company_revenue_df['company_name'] = company_name
            
            # Save the company data as soon as processed
            if not os.path.exists(sector_file):
                company_revenue_df.to_csv(sector_file, index=False, mode='w', header=True)
            else:
                company_revenue_df.to_csv(sector_file, index=False, mode='a', header=False)
                
            print(f"Saved revenue data for {ticker} to {sector_file}")
        else:
            print(f"No data found for {ticker}.")

def main():
    os.makedirs(SECTOR_DIR, exist_ok=True)
    
    # Scrape SP500 companies and their sectors
    companies_data = scrape_wikipedia_sp500()
    
    # Group companies by sector
    sectors = {}
    for ticker, company_name, sector in companies_data:
        if sector not in sectors:
            sectors[sector] = []
        sectors[sector].append((ticker, company_name))
    
    # Define start and end dates for revenue data
    start_date = "2015-01-01"
    end_date = "2024-06-30"
    
    # Process each sector
    for sector, companies in sectors.items():
        print(f"\nProcessing sector: {sector}")
        process_sector_revenue(sector, companies, start_date, end_date)

if __name__ == "__main__":
    main()
