# Fixed Multi-Ticker Data Collection

This notebook contains the fixed version of your code to pull financial data for multiple tickers.

**Key fixes:**
1. Proper error handling to prevent empty concatenation
2. Better API response handling for different data formats
3. Data validation and cleaning
4. Progress tracking and summary reporting
5. Rate limiting to avoid API issues

**Target columns:** `quarter`, `ticker`, `industry`, `sector`, `debt_to_assets`, `mkt_cap`, `stock_price`

In [1]:
import requests
import pandas as pd
import time
from typing import Optional, List, Dict, Any

# Your API key
API = "7cNMpVzb43GKtm05iRTDWJtyJXSylX8J"

## Helper Functions

These functions handle API calls and data processing with proper error handling.

In [2]:
def get_json(url: str, params: Dict[str, Any] = {}) -> Optional[List[Dict]]:
    """Safely get JSON data from API with error handling"""
    try:
        params["apikey"] = API
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()  # Raise an exception for bad status codes
        js = response.json()
        
        # Handle different response formats
        if isinstance(js, dict) and "historical" in js:
            return js["historical"]
        elif isinstance(js, list):
            return js
        else:
            print(f"Unexpected response format: {type(js)}")
            return None
    except Exception as e:
        print(f"Error fetching data from {url}: {e}")
        return None

In [3]:
def process_ticker_data(ticker: str) -> Optional[pd.DataFrame]:
    """Process data for a single ticker and return DataFrame with required columns"""
    print(f"Processing {ticker}...")
    
    try:
        # Get balance sheet data
        bs = get_json(
            f"https://financialmodelingprep.com/api/v3/balance-sheet-statement/{ticker}", 
            {"period": "quarter", "limit": 20}
        )
        
        # Get market cap data
        mc = get_json(
            f"https://financialmodelingprep.com/api/v3/historical-market-capitalization/{ticker}", 
            {"limit": 1000}
        )
        
        # Get price data
        px = get_json(
            f"https://financialmodelingprep.com/api/v3/historical-price-full/{ticker}", 
            {"serietype": "line", "timeseries": 1000}
        )
        
        # Get company profile
        profile = get_json(f"https://financialmodelingprep.com/api/v3/profile/{ticker}")
        
        # Check if all data is available
        if not all([bs, mc, px, profile]):
            print(f"Missing data for {ticker}, skipping.")
            return None
        
        # Extract company info
        industry = profile[0].get("industry", "Unknown")
        sector = profile[0].get("sector", "Unknown")
        
        # Process balance sheet data
        bs_df = (
            pd.DataFrame(bs)
            .loc[:, ["date", "shortTermDebt", "longTermDebt", "totalAssets"]]
            .assign(
                date=lambda d: pd.to_datetime(d.date),
                quarter=lambda d: d.date.dt.to_period("Q"),
                debt_to_assets=lambda d: (
                    (d.shortTermDebt.fillna(0) + d.longTermDebt.fillna(0)) / 
                    d.totalAssets.replace(0, pd.NA)
                )
            )
            .dropna(subset=["debt_to_assets"])
        )
        
        # Process market cap data
        mc_df = (
            pd.DataFrame(mc)
            .assign(
                date=lambda d: pd.to_datetime(d.date),
                quarter=lambda d: d.date.dt.to_period("Q")
            )
            .sort_values("date")
            .drop_duplicates("quarter", keep="last")
            .rename(columns={"marketCap": "mkt_cap"})
            [["quarter", "mkt_cap"]]
        )
        
        # Process price data
        px_df = (
            pd.DataFrame(px)
            .assign(
                date=lambda d: pd.to_datetime(d.date),
                quarter=lambda d: d.date.dt.to_period("Q")
            )
            .sort_values("date")
            .drop_duplicates("quarter", keep="last")
            .rename(columns={"close": "stock_price"})
            [["quarter", "stock_price"]]
        )
        
        # Merge all data
        merged = (
            bs_df.merge(mc_df, on="quarter", how="left")
                 .merge(px_df, on="quarter", how="left")
                 .assign(ticker=ticker, industry=industry, sector=sector)
                 [["quarter", "ticker", "industry", "sector", "debt_to_assets", "mkt_cap", "stock_price"]]
                 .dropna()  # Remove rows with missing data
        )
        
        if len(merged) == 0:
            print(f"No valid data after merging for {ticker}")
            return None
            
        return merged
        
    except Exception as e:
        print(f"Error processing {ticker}: {e}")
        return None

In [4]:
def get_ticker_data(tickers: List[str], max_tickers: int = 50) -> pd.DataFrame:
    """Get data for multiple tickers with proper error handling"""
    all_data = []
    successful_tickers = []
    failed_tickers = []
    
    for i, ticker in enumerate(tickers[:max_tickers]):
        print(f"\nProgress: {i+1}/{min(len(tickers), max_tickers)}")
        
        ticker_data = process_ticker_data(ticker)
        
        if ticker_data is not None and len(ticker_data) > 0:
            all_data.append(ticker_data)
            successful_tickers.append(ticker)
            print(f"✓ Successfully processed {ticker} - {len(ticker_data)} quarters")
        else:
            failed_tickers.append(ticker)
            print(f"✗ Failed to process {ticker}")
        
        # Rate limiting
        time.sleep(0.5)
    
    print(f"\n=== SUMMARY ===")
    print(f"Successful: {len(successful_tickers)} tickers")
    print(f"Failed: {len(failed_tickers)} tickers")
    
    if len(all_data) == 0:
        print("No data collected. Returning empty DataFrame.")
        return pd.DataFrame(columns=["quarter", "ticker", "industry", "sector", "debt_to_assets", "mkt_cap", "stock_price"])
    
    # Combine all data
    final_df = pd.concat(all_data, ignore_index=True)
    final_df = final_df.sort_values(["ticker", "quarter"]).reset_index(drop=True)
    
    print(f"Final dataset: {len(final_df)} rows, {final_df['ticker'].nunique()} unique tickers")
    return final_df

## Step 1: Get List of US Tickers

Fetch all available US tickers and filter them to remove penny stocks and complex symbols.

In [5]:
# Get list of US tickers
print("Fetching ticker list...")
tickers_data = get_json("https://financialmodelingprep.com/api/v3/stock/list")

if tickers_data:
    # Filter for US exchanges and remove penny stocks
    us_tickers = [
        d["symbol"] for d in tickers_data 
        if d["exchangeShortName"] in ["NYSE", "NASDAQ"] 
        and (d.get("price") is not None and d.get("price", 0) > 5)  # Filter out penny stocks and None prices
        and len(d["symbol"]) <= 5  # Filter out complex symbols
        and "." not in d["symbol"]  # Filter out preferred shares
    ]
    
    print(f"Found {len(us_tickers)} US tickers")
    print(f"Sample tickers: {us_tickers[:10]}")
else:
    print("Failed to fetch ticker list. Using sample tickers.")
    us_tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA", "META", "NVDA", "JPM", "JNJ", "V"]

Fetching ticker list...
Found 12283 US tickers
Sample tickers: ['STXD', 'DXYZ', 'ACVA', 'STXG', 'PWRD', 'TPZ', 'THIR', 'NVR', 'SMR', 'BRK-A']


## Step 2: Test with Single Ticker

Before processing multiple tickers, let's test with AAPL to ensure everything works correctly.

In [6]:
# Test with a single ticker first
print("Testing with a single ticker (AAPL)...")
test_data = process_ticker_data("AAPL")

if test_data is not None:
    print("\nTest successful! Sample data:")
    print(test_data.head())
    print(f"\nData types:")
    print(test_data.dtypes)
    print(f"\nShape: {test_data.shape}")
else:
    print("Test failed. Please check your API key.")

Testing with a single ticker (AAPL)...
Processing AAPL...

Test successful! Sample data:
  quarter ticker              industry      sector  debt_to_assets  \
0  2025Q1   AAPL  Consumer Electronics  Technology        0.296426   
1  2024Q4   AAPL  Consumer Electronics  Technology        0.281323   
2  2024Q3   AAPL  Consumer Electronics  Technology        0.326207   
3  2024Q2   AAPL  Consumer Electronics  Technology        0.305490   
4  2024Q1   AAPL  Consumer Electronics  Technology        0.309978   

        mkt_cap  stock_price  
0  3.330635e+12       222.13  
1  3.754818e+12       250.42  
2  3.514042e+12       233.00  
3  3.219858e+12       210.62  
4  2.641796e+12       171.48  

Data types:
quarter           period[Q-DEC]
ticker                   object
industry                 object
sector                   object
debt_to_assets          float64
mkt_cap                 float64
stock_price             float64
dtype: object

Shape: (16, 7)


## Step 3: Collect Data for Multiple Tickers

Now let's process multiple tickers. You can adjust `max_tickers` to control how many tickers to process.

**Note:** Start with a small number (20-50) for testing, then increase as needed.

In [7]:
# Process the tickers (adjust max_tickers as needed)
MAX_TICKERS = 20  # Start with 20 for testing

print(f"Starting data collection for {MAX_TICKERS} tickers...")
final_dataset = get_ticker_data(us_tickers, max_tickers=MAX_TICKERS)

print(f"\nData collection complete!")

Starting data collection for 20 tickers...

Progress: 1/20
Processing STXD...
Missing data for STXD, skipping.
✗ Failed to process STXD

Progress: 2/20
Processing DXYZ...
✓ Successfully processed DXYZ - 2 quarters

Progress: 3/20
Processing ACVA...
✓ Successfully processed ACVA - 16 quarters

Progress: 4/20
Processing STXG...
Missing data for STXG, skipping.
✗ Failed to process STXG

Progress: 5/20
Processing PWRD...
Missing data for PWRD, skipping.
✗ Failed to process PWRD

Progress: 6/20
Processing TPZ...
✓ Successfully processed TPZ - 7 quarters

Progress: 7/20
Processing THIR...
Missing data for THIR, skipping.
✗ Failed to process THIR

Progress: 8/20
Processing NVR...
✓ Successfully processed NVR - 16 quarters

Progress: 9/20
Processing SMR...
✓ Successfully processed SMR - 16 quarters

Progress: 10/20
Processing BRK-A...
✓ Successfully processed BRK-A - 16 quarters

Progress: 11/20
Processing NMAX...
Missing data for NMAX, skipping.
✗ Failed to process NMAX

Progress: 12/20
Proce

## Step 4: Display Results and Save Data

Let's examine the collected data and save it to a CSV file.

In [8]:
# Display results
if len(final_dataset) > 0:
    print("=== SAMPLE DATA ===")
    print(final_dataset.head(10))
    
    print("\n=== DATA SUMMARY ===")
    print(f"Total rows: {len(final_dataset):,}")
    print(f"Unique tickers: {final_dataset['ticker'].nunique()}")
    print(f"Date range: {final_dataset['quarter'].min()} to {final_dataset['quarter'].max()}")
    print(f"Industries: {final_dataset['industry'].nunique()}")
    print(f"Sectors: {final_dataset['sector'].nunique()}")
    
    print("\n=== COLUMN STATISTICS ===")
    print(final_dataset.describe())
    
    # Save to CSV
    filename = "stock_data_notebook.csv"
    final_dataset.to_csv(filename, index=False)
    print(f"\n✅ Data saved to '{filename}'")
    
else:
    print("❌ No data was collected. Please check your API key and network connection.")

=== SAMPLE DATA ===
  quarter ticker            industry             sector debt_to_assets  \
0  2021Q2   ACVA  Auto - Dealerships  Consumer Cyclical       0.003023   
1  2021Q3   ACVA  Auto - Dealerships  Consumer Cyclical       0.002881   
2  2021Q4   ACVA  Auto - Dealerships  Consumer Cyclical       0.003922   
3  2022Q1   ACVA  Auto - Dealerships  Consumer Cyclical       0.063755   
4  2022Q2   ACVA  Auto - Dealerships  Consumer Cyclical       0.079275   
5  2022Q3   ACVA  Auto - Dealerships  Consumer Cyclical       0.085442   
6  2022Q4   ACVA  Auto - Dealerships  Consumer Cyclical       0.083799   
7  2023Q1   ACVA  Auto - Dealerships  Consumer Cyclical       0.095966   
8  2023Q2   ACVA  Auto - Dealerships  Consumer Cyclical        0.11213   
9  2023Q3   ACVA  Auto - Dealerships  Consumer Cyclical       0.109479   

        mkt_cap  stock_price  
0  3.973622e+09        25.63  
1  2.786754e+09        17.89  
2  2.941018e+09        18.84  
3  2.320782e+09        14.81  
4  1.02850

## Step 5: Basic Data Analysis (Optional)

Let's do some quick analysis of the collected data.

In [9]:
if len(final_dataset) > 0:
    print("=== INDUSTRY BREAKDOWN ===")
    industry_counts = final_dataset['industry'].value_counts()
    print(industry_counts.head(10))
    
    print("\n=== SECTOR BREAKDOWN ===")
    sector_counts = final_dataset['sector'].value_counts()
    print(sector_counts)
    
    print("\n=== DEBT-TO-ASSETS STATISTICS ===")
    print(f"Mean: {final_dataset['debt_to_assets'].mean():.4f}")
    print(f"Median: {final_dataset['debt_to_assets'].median():.4f}")
    print(f"Min: {final_dataset['debt_to_assets'].min():.4f}")
    print(f"Max: {final_dataset['debt_to_assets'].max():.4f}")
    
    print("\n=== MARKET CAP RANGES ===")
    final_dataset['mkt_cap_billions'] = final_dataset['mkt_cap'] / 1e9
    print(f"Mean Market Cap: ${final_dataset['mkt_cap_billions'].mean():.2f}B")
    print(f"Median Market Cap: ${final_dataset['mkt_cap_billions'].median():.2f}B")
    print(f"Largest Market Cap: ${final_dataset['mkt_cap_billions'].max():.2f}B")
    print(f"Smallest Market Cap: ${final_dataset['mkt_cap_billions'].min():.2f}B")

=== INDUSTRY BREAKDOWN ===
industry
Oil & Gas Integrated           32
Gold                           23
Entertainment                  17
Auto - Dealerships             16
Insurance - Diversified        16
Communication Equipment        16
Residential Construction       16
Computer Hardware              16
Renewable Utilities            16
Telecommunications Services    16
Name: count, dtype: int64

=== SECTOR BREAKDOWN ===
sector
Basic Materials           38
Communication Services    33
Consumer Cyclical         32
Energy                    32
Technology                32
Financial Services        25
Utilities                 16
Name: count, dtype: int64

=== DEBT-TO-ASSETS STATISTICS ===
Mean: 0.2285
Median: 0.1639
Min: 0.0000
Max: 1.5692

=== MARKET CAP RANGES ===
Mean Market Cap: $144.33B
Median Market Cap: $18.44B
Largest Market Cap: $1148.34B
Smallest Market Cap: $0.08B


## Step 6: Scale Up (Optional)

If you want to collect data for more tickers, you can run this cell with a larger number.

In [10]:
# Uncomment and run this cell to collect data for more tickers
# WARNING: This will take longer and use more API calls

# LARGE_BATCH = 100  # Adjust as needed
# print(f"Collecting data for {LARGE_BATCH} tickers...")
# large_dataset = get_ticker_data(us_tickers, max_tickers=LARGE_BATCH)

# if len(large_dataset) > 0:
#     large_dataset.to_csv("large_stock_data.csv", index=False)
#     print(f"Large dataset saved: {len(large_dataset)} rows, {large_dataset['ticker'].nunique()} tickers")

print("Scale-up cell ready. Uncomment the code above to run with more tickers.") 

Scale-up cell ready. Uncomment the code above to run with more tickers.
