## 1. Consolidate Symbols for Price Fetching

First, we need to create a master list of all unique symbols from the financial statement files we generated in the previous notebook. This will be our target list for fetching historical price data.


In [5]:
import pandas as pd
import os
import glob

def consolidate_symbols_from_financials():
    """
    Reads all 'financials_*.csv' files, gets a unique list of symbols,
    and saves it to a new CSV file.
    """
    financials_dir = '../Statements/Statement Data'
    output_path = 'symbols_from_statements.csv'
    
    # Check if the directory exists
    if not os.path.isdir(financials_dir):
        print(f"Error: Directory not found at '{financials_dir}'. Please ensure financial statements have been generated.")
        return

    # Find all financial statement CSV files
    csv_files = glob.glob(os.path.join(financials_dir, 'financials_*.csv'))
    
    if not csv_files:
        print(f"No financial statement CSVs found in '{financials_dir}'.")
        return

    print(f"Found {len(csv_files)} financial statement files to process.")
    
    # Read and concatenate all files
    df_list = [pd.read_csv(f) for f in csv_files]
    full_df = pd.concat(df_list, ignore_index=True)
    
    # Get unique symbols
    unique_symbols = full_df['symbol'].unique()
    
    # Create a new DataFrame and save to CSV
    symbols_df = pd.DataFrame(unique_symbols, columns=['symbol'])
    symbols_df.to_csv(output_path, index=False)
    
    print(f"Found {len(full_df)} total financial records.")
    print(f"Extracted {len(symbols_df)} unique symbols.")
    print(f"Master symbol list saved to '{output_path}'.")

# Run the function
consolidate_symbols_from_financials()



Found 4 financial statement files to process.
Found 298487 total financial records.
Extracted 10995 unique symbols.
Master symbol list saved to 'symbols_from_statements.csv'.


## 2. Phase 1: Process Data from Local 9.5GB CSV

The first part of our hybrid approach is to leverage your large local data file. This script will use DuckDB to query the `daily_price_data.csv` without loading the whole file into memory.

For every symbol found, it will extract the last trading day of each month for the past 10 years and save it as a separate file in the `partials_price` directory. Any symbols **not** found in the local file will be saved to a separate list to be handled in the next phase.


In [None]:
import pandas as pd
import duckdb
import os
from tqdm.notebook import tqdm
from datetime import datetime, timedelta

def process_local_csv_for_prices():
    """
    Uses DuckDB to query the large local CSV for daily price data from the last 10 years,
    saves the results to partial files, and creates a list of symbols
    that were not found for the next phase.
    """
    # --- Configuration ---
    SYMBOLS_FILE_PATH = 'symbols_from_statements.csv'
    BIG_CSV_PATH = 'Price Data/daily_price_data.csv'
    PARTIALS_DIR = 'Price Data/partials_price'
    API_FETCH_LIST_PATH = 'symbols_not_found_locally.csv'
    
    os.makedirs(PARTIALS_DIR, exist_ok=True)

    # 1. Load all symbols and determine which ones still need to be processed
    all_symbols_df = pd.read_csv(SYMBOLS_FILE_PATH)
    all_symbols = all_symbols_df['symbol'].unique().tolist()
    completed_symbols = {f.split('.')[0] for f in os.listdir(PARTIALS_DIR)}
    symbols_to_process = [s for s in all_symbols if s not in completed_symbols]
    
    print(f"{len(completed_symbols)} symbols already processed. Checking for {len(symbols_to_process)} remaining symbols in local CSV.")
    
    symbols_found_in_csv = []
    
    # 2. Query the large CSV if it exists
    if os.path.exists(BIG_CSV_PATH):
        if not symbols_to_process:
            print("All required symbols have already been processed.")
        else:
            try:
                # Connect to DuckDB
                con = duckdb.connect()
                con.execute("PRAGMA disable_progress_bar;")

                # Define date range for the query
                ten_years_ago_str = (datetime.now() - timedelta(days=365*10)).strftime('%Y-%m-%d')

                # Optimized query for daily data within the last 10 years
                query = f"""
                SELECT
                    "date",
                    "symbol",
                    "adjClose"
                FROM read_csv_auto('{BIG_CSV_PATH}')
                WHERE "symbol" IN {tuple(symbols_to_process)} AND "date" >= '{ten_years_ago_str}';
                """
                results_df = con.execute(query).df()
                con.close()
                
                if not results_df.empty:
                    grouped = results_df.groupby('symbol')
                    for symbol, group_df in tqdm(grouped, desc="Saving daily data from local CSV"):
                        output_path = os.path.join(PARTIALS_DIR, f"{symbol}.csv")
                        group_df.to_csv(output_path, index=False)
                        symbols_found_in_csv.append(symbol)
                    print(f"\nSuccessfully processed and saved data for {len(symbols_found_in_csv)} symbols from the local file.")
                else:
                    print("No new symbols found in the local CSV for the specified date range.")

            except Exception as e:
                print(f"DuckDB query failed: {e}")
                if 'con' in locals():
                    con.close()
    else:
        print(f"Large CSV file not found at '{BIG_CSV_PATH}'. All symbols will be fetched from the API.")

    # 3. Create the list of symbols that still need to be fetched from the API
    symbols_for_api = [s for s in symbols_to_process if s not in symbols_found_in_csv]
    
    if symbols_for_api:
        api_df = pd.DataFrame(symbols_for_api, columns=['symbol'])
        api_df.to_csv(API_FETCH_LIST_PATH, index=False)
        print(f"\nSaved a list of {len(symbols_for_api)} symbols to be fetched from the API to '{API_FETCH_LIST_PATH}'.")
    else:
        print("\nNo symbols need to be fetched from the API. All required data was found locally.")
        # Create an empty file to prevent errors in the next step
        pd.DataFrame(columns=['symbol']).to_csv(API_FETCH_LIST_PATH, index=False)

# Run the local processing function
process_local_csv_for_prices()



0 symbols already processed. Checking for 10995 remaining symbols in local CSV.


Saving daily data from local CSV:   0%|          | 0/8818 [00:00<?, ?it/s]


Successfully processed and saved data for 8818 symbols from the local file.

Saved a list of 2177 symbols to be fetched from the API to '../output/symbols_to_fetch_from_api.csv'.


## 2. Phase 2: Fetch Remaining Data from API

Now, run this cell to fetch the price data for all the symbols that were **not** found in the local CSV file.

It reads the list of missing symbols from `symbols_not_found_locally.csv` and uses the same robust, resumable, and concurrent fetching logic as our previous tasks. It will save each successfully fetched symbol to the same `partials_price` directory, and you can stop and restart it without losing progress.


In [None]:
import pandas as pd
import os
from dotenv import load_dotenv
import asyncio
import aiohttp
from tqdm.notebook import tqdm
import nest_asyncio
from datetime import datetime, timedelta

# Apply nest_asyncio to allow running asyncio event loop in Jupyter
nest_asyncio.apply()

async def fetch_api_data_and_save(session, symbol, pbar, partials_dir, api_key):
    """Fetches, processes, and saves daily data for a single symbol from the API."""
    ten_years_ago = (datetime.now() - timedelta(days=365*10)).strftime('%Y-%m-%d')
    today = datetime.now().strftime('%Y-%m-%d')
    
    url = f"https://financialmodelingprep.com/stable/historical-price-eod/dividend-adjusted"
    params = {"symbol": symbol, "from": ten_years_ago, "to": today, "apikey": api_key}
    
    max_retries = 3
    attempts = 0
    while attempts < max_retries:
        try:
            async with session.get(url, params=params, timeout=60) as response:
                if response.status == 429:
                    retry_after = int(response.headers.get("Retry-After", 15)) # Default to 15s if header is missing
                    pbar.write(f"Rate limit hit for {symbol}. Waiting {retry_after}s...")
                    await asyncio.sleep(retry_after)
                    continue # Retry without incrementing attempts

                response.raise_for_status()
                data = await response.json()
                # Corrected logic: The response is a list directly, not nested under 'historical'
                if data:
                    df = pd.DataFrame(data)
                    if not df.empty:
                        # Ensure 'symbol' column exists for consistency, as it might be missing in some API responses
                        if 'symbol' not in df.columns:
                            df['symbol'] = symbol
                        output_path = os.path.join(partials_dir, f"{symbol}.csv")
                        df_to_save = df[['date', 'symbol', 'adjClose']]
                        df_to_save.to_csv(output_path, index=False)
                    return # Success
                else:
                    # No data found, but request was successful. Count as done.
                    return

        except Exception as e:
            attempts += 1
            if attempts < max_retries:
                wait_time = 2 * attempts # Exponential backoff
                pbar.write(f"Request for {symbol} failed with {e}. Retrying in {wait_time}s... (Attempt {attempts}/{max_retries})")
                await asyncio.sleep(wait_time)
            else:
                pbar.write(f"API fetch failed for {symbol} after {max_retries} retries. Final error: {e}")
    return

async def fetch_api_data_main():
    """Main function to fetch API data for symbols listed in the 'to_fetch' file."""
    # --- Configuration ---
    API_FETCH_LIST_PATH = 'symbols_not_found_locally.csv'
    PARTIALS_DIR = 'Price Data/partials_price'
    FAILED_SYMBOLS_PATH = 'symbols_with_no_price.csv'
    
    load_dotenv()
    API_KEY = os.getenv('API_KEY')
    if not API_KEY:
        print("API_KEY not found in .env file.")
        return

    # 1. Load symbols to fetch and check against already completed ones
    try:
        symbols_to_fetch_df = pd.read_csv(API_FETCH_LIST_PATH)
    except FileNotFoundError:
        print(f"'{API_FETCH_LIST_PATH}' not found. Please run the local CSV processing cell first.")
        return
        
    all_api_symbols = symbols_to_fetch_df['symbol'].unique().tolist()
    completed_symbols = {f.split('.')[0] for f in os.listdir(PARTIALS_DIR)}
    symbols_to_process = [s for s in all_api_symbols if s not in completed_symbols]

    if not symbols_to_process:
        print("All required symbols have already been processed. No API calls needed.")
        return

    print(f"Starting API fetch for {len(symbols_to_process)} symbols.")
    
    # 2. Asynchronous fetching with reduced concurrency
    sem = asyncio.Semaphore(15)
    async with aiohttp.ClientSession() as session:
        with tqdm(total=len(symbols_to_process), desc="Fetching from API") as pbar:
            tasks = []
            for symbol in symbols_to_process:
                async def fetch_task(s):
                    async with sem:
                        await fetch_api_data_and_save(session, s, pbar, PARTIALS_DIR, API_KEY)
                    pbar.update(1)
                tasks.append(fetch_task(symbol))
            await asyncio.gather(*tasks)

    print("\n--- API data fetching is complete. ---")

    # 3. Check for any symbols that failed during this run and output them
    final_completed_symbols = {f.split('.')[0] for f in os.listdir(PARTIALS_DIR)}
    failed_symbols = [s for s in all_api_symbols if s not in final_completed_symbols]

    if failed_symbols:
        failed_df = pd.DataFrame(failed_symbols, columns=['symbol'])
        failed_df.to_csv(FAILED_SYMBOLS_PATH, index=False)
        print(f"Identified {len(failed_symbols)} symbols that could not be fetched. Their list has been saved to '{FAILED_SYMBOLS_PATH}'.")
    else:
        print("All symbols were fetched successfully.")

# Run the API fetching process
await fetch_api_data_main()



Starting API fetch for 2177 symbols.


Fetching from API:   0%|          | 0/2177 [00:00<?, ?it/s]


--- API data fetching is complete. ---
Identified 196 symbols that could not be fetched. Their list has been saved to '../output/symbols_with_no_price.csv'.


## 3. Assemble Final Price Data Reports

Once the fetching script above is complete, run this final cell. It will gather all the individual symbol files from the `partials_price` directory and assemble them into four final, alphabetized CSV files, which are easier to work with.


In [6]:
import pandas as pd
import os
import glob

def assemble_price_reports():
    """
    Assembles individual symbol price CSVs from the 'partials_price' directory
    into four final alphabetized batch files.
    """
    partials_dir = 'Price Data/partials_price'
    output_dir = 'Price Data'
    os.makedirs(output_dir, exist_ok=True)
    
    if not os.path.exists(partials_dir):
        print(f"Directory with partial files not found: {partials_dir}")
        return

    all_files = glob.glob(os.path.join(partials_dir, '*.csv'))
    if not all_files:
        print("No partial symbol files were found to assemble.")
        return

    print(f"Assembling data from {len(all_files)} individual symbol files...")
    
    # Use a generator to save memory
    df_generator = (pd.read_csv(f) for f in all_files)
    full_df = pd.concat(df_generator, ignore_index=True)
    
    if 'symbol' not in full_df.columns:
        print("Error: 'symbol' column not found in the assembled data. Cannot create batches.")
        return

    # Split into alphabetical chunks
    symbol_chunks = {
        "A-F": full_df[full_df['symbol'].str[0].str.upper() <= 'F'],
        "G-L": full_df[(full_df['symbol'].str[0].str.upper() >= 'G') & (full_df['symbol'].str[0].str.upper() <= 'L')],
        "M-R": full_df[(full_df['symbol'].str[0].str.upper() >= 'M') & (full_df['symbol'].str[0].str.upper() <= 'R')],
        "S-Z": full_df[full_df['symbol'].str[0].str.upper() >= 'S']
    }

    for chunk_name, chunk_df in symbol_chunks.items():
        if not chunk_df.empty:
            output_path = os.path.join(output_dir, f"price_data_{chunk_name}.csv")
            chunk_df.to_csv(output_path, index=False)
            print(f"Saved chunk '{chunk_name}' with {len(chunk_df)} records to {output_path}")
        else:
            print(f"No data for chunk '{chunk_name}'.")
    
    print("--- Assembly of final price reports is complete. ---")

# Run the assembly function
assemble_price_reports()



Assembling data from 10801 individual symbol files...
Saved chunk 'A-F' with 5243452 records to Price Data\price_data_A-F.csv
Saved chunk 'G-L' with 2502525 records to Price Data\price_data_G-L.csv
Saved chunk 'M-R' with 3249138 records to Price Data\price_data_M-R.csv
Saved chunk 'S-Z' with 3052851 records to Price Data\price_data_S-Z.csv
--- Assembly of final price reports is complete. ---


## 4. Fetch Historical Price Data (10-40 Years Ago)

This section fetches historical price data for the period starting from January 1, 1985, up to 10 years before the current date. This complements the 10-year data already fetched.

The process is designed to be efficient and resumable:
- It uses an asynchronous approach with `aiohttp` to handle many requests concurrently.
- A semaphore limits concurrency to avoid overwhelming the API server.
- A delay is introduced between creating fetch tasks to respect the API rate limit (3000 calls/minute).
- Progress is saved to a `partials_price_30yr` directory, so if the script is interrupted, it can be resumed without losing work.
- Any symbols that fail to fetch after multiple retries are logged to `symbols_with_no_price_30yr.csv`.



In [2]:
import pandas as pd
import os
from dotenv import load_dotenv
import asyncio
import aiohttp
from tqdm.notebook import tqdm
import nest_asyncio
from datetime import datetime, timedelta

# Apply nest_asyncio to allow running asyncio event loop in Jupyter
nest_asyncio.apply()

async def fetch_api_data_and_save_30yr(session, symbol, pbar, partials_dir, api_key, from_date, to_date):
    """Fetches, processes, and saves daily data for a single symbol from the API for the 10-30 year range."""
    url = f"https://financialmodelingprep.com/stable/historical-price-eod/dividend-adjusted"
    params = {"symbol": symbol, "from": from_date, "to": to_date, "apikey": api_key}
    
    max_retries = 3
    attempts = 0
    while attempts < max_retries:
        try:
            async with session.get(url, params=params, timeout=60) as response:
                if response.status == 429:
                    retry_after = int(response.headers.get("Retry-After", 15))
                    pbar.write(f"Rate limit hit for {symbol}. Waiting {retry_after}s...")
                    await asyncio.sleep(retry_after)
                    continue

                response.raise_for_status()
                data = await response.json()
                
                if data:
                    df = pd.DataFrame(data)
                    if not df.empty:
                        if 'symbol' not in df.columns:
                            df['symbol'] = symbol
                        output_path = os.path.join(partials_dir, f"{symbol}.csv")
                        df_to_save = df[['date', 'symbol', 'adjClose']]
                        df_to_save.to_csv(output_path, index=False)
                    return # Success
                else:
                    return # No data found, but request was successful

        except Exception as e:
            attempts += 1
            if attempts < max_retries:
                wait_time = 2 * attempts
                pbar.write(f"Request for {symbol} failed with {e}. Retrying in {wait_time}s... (Attempt {attempts}/{max_retries})")
                await asyncio.sleep(wait_time)
            else:
                pbar.write(f"API fetch failed for {symbol} after {max_retries} retries. Final error: {e}")
    return

async def fetch_api_data_main_30yr():
    """Main function to fetch historical API data for all symbols."""
    # --- Configuration ---
    SYMBOLS_FILE_PATH = 'symbols_from_statements.csv'
    PARTIALS_DIR = 'Price Data/partials_price_30yr'
    FAILED_SYMBOLS_PATH = 'symbols_with_no_price_30yr.csv'
    
    os.makedirs(PARTIALS_DIR, exist_ok=True)
    
    load_dotenv()
    API_KEY = os.getenv('API_KEY')
    if not API_KEY:
        print("API_KEY not found in .env file.")
        return

    # 1. Define date range
    from_date = '1985-01-01'
    to_date = (datetime.now() - timedelta(days=365*10)).strftime('%Y-%m-%d')
    
    # 2. Load symbols and check against already completed ones
    try:
        all_symbols_df = pd.read_csv(SYMBOLS_FILE_PATH)
    except FileNotFoundError:
        print(f"'{SYMBOLS_FILE_PATH}' not found. Please run the first cell to generate it.")
        return
        
    all_symbols = all_symbols_df['symbol'].unique().tolist()
    completed_symbols = {f.split('.')[0] for f in os.listdir(PARTIALS_DIR)}
    symbols_to_process = [s for s in all_symbols if s not in completed_symbols]

    if not symbols_to_process:
        print("All required symbols for the 10-30 year range have already been processed.")
        return

    print(f"Starting 10-30 year API fetch for {len(symbols_to_process)} symbols (from {from_date} to {to_date}).")
    
    # 3. Asynchronous fetching with rate limiting
    CONCURRENCY_LIMIT = 20
    API_CALL_INTERVAL = 0.077 # To stay safely under 3000 calls/min
    sem = asyncio.Semaphore(CONCURRENCY_LIMIT)
    
    async with aiohttp.ClientSession() as session:
        with tqdm(total=len(symbols_to_process), desc="Fetching 10-30yr Prices") as pbar:
            tasks = []
            
            async def fetch_with_sem(symbol):
                async with sem:
                    await fetch_api_data_and_save_30yr(session, symbol, pbar, PARTIALS_DIR, API_KEY, from_date, to_date)
                pbar.update(1)

            for symbol in symbols_to_process:
                tasks.append(asyncio.create_task(fetch_with_sem(symbol)))
                await asyncio.sleep(API_CALL_INTERVAL)
            
            await asyncio.gather(*tasks)

    print("\n--- API data fetching for 10-30 year range is complete. ---")

    # 4. Check for any symbols that failed and save them
    final_completed_symbols = {f.split('.')[0] for f in os.listdir(PARTIALS_DIR)}
    failed_symbols = [s for s in all_symbols if s not in final_completed_symbols]

    if failed_symbols:
        failed_df = pd.DataFrame(failed_symbols, columns=['symbol'])
        failed_df.to_csv(FAILED_SYMBOLS_PATH, index=False)
        print(f"Identified {len(failed_symbols)} symbols that could not be fetched. List saved to '{FAILED_SYMBOLS_PATH}'.")
    else:
        print("All symbols were fetched successfully.")

# Run the API fetching process
await fetch_api_data_main_30yr()



Starting 10-30 year API fetch for 10995 symbols (from 1985-01-01 to 2015-08-28).


Fetching 10-30yr Prices:   0%|          | 0/10995 [00:00<?, ?it/s]


--- API data fetching for 10-30 year range is complete. ---
Identified 6895 symbols that could not be fetched. List saved to 'symbols_with_no_price_30yr.csv'.


## 5. Assemble 30-Year Price Data Reports

This final cell assembles the individual symbol files from the `partials_price_30yr` directory into four alphabetized CSV files. The final files will be stored in the `Price Data` directory with a `30yr_` prefix.


In [3]:
import pandas as pd
import os
import glob

def assemble_price_reports_30yr():
    """
    Assembles individual symbol price CSVs from the 'partials_price_30yr' directory
    into four final alphabetized batch files.
    """
    partials_dir = 'Price Data/partials_price_30yr'
    output_dir = 'Price Data'
    os.makedirs(output_dir, exist_ok=True)
    
    if not os.path.exists(partials_dir):
        print(f"Directory with partial files not found: {partials_dir}")
        return

    all_files = glob.glob(os.path.join(partials_dir, '*.csv'))
    if not all_files:
        print("No partial 30-year symbol files were found to assemble.")
        return

    print(f"Assembling 30-year data from {len(all_files)} individual symbol files...")
    
    df_generator = (pd.read_csv(f) for f in all_files)
    full_df = pd.concat(df_generator, ignore_index=True)
    
    if 'symbol' not in full_df.columns:
        print("Error: 'symbol' column not found in the assembled data. Cannot create batches.")
        return

    # Split into alphabetical chunks
    symbol_chunks = {
        "A-F": full_df[full_df['symbol'].str[0].str.upper() <= 'F'],
        "G-L": full_df[(full_df['symbol'].str[0].str.upper() >= 'G') & (full_df['symbol'].str[0].str.upper() <= 'L')],
        "M-R": full_df[(full_df['symbol'].str[0].str.upper() >= 'M') & (full_df['symbol'].str[0].str.upper() <= 'R')],
        "S-Z": full_df[full_df['symbol'].str[0].str.upper() >= 'S']
    }

    for chunk_name, chunk_df in symbol_chunks.items():
        if not chunk_df.empty:
            output_path = os.path.join(output_dir, f"30yr_price_data_{chunk_name}.csv")
            chunk_df.to_csv(output_path, index=False)
            print(f"Saved 30-year chunk '{chunk_name}' with {len(chunk_df)} records to {output_path}")
        else:
            print(f"No 30-year data for chunk '{chunk_name}'.")
    
    print("--- Assembly of final 30-year price reports is complete. ---")

# Run the assembly function
assemble_price_reports_30yr()



Assembling 30-year data from 4100 individual symbol files...
Saved 30-year chunk 'A-F' with 5563601 records to Price Data\30yr_price_data_A-F.csv
Saved 30-year chunk 'G-L' with 2594242 records to Price Data\30yr_price_data_G-L.csv
Saved 30-year chunk 'M-R' with 3411769 records to Price Data\30yr_price_data_M-R.csv
Saved 30-year chunk 'S-Z' with 3248351 records to Price Data\30yr_price_data_S-Z.csv
--- Assembly of final 30-year price reports is complete. ---


## 6. Consolidate All Price Data

This final step merges the 10-year and 30-year price data files. It reads each corresponding pair of chunked files (e.g., `price_data_A-F.csv` and `30yr_price_data_A-F.csv`), combines them, removes any duplicate rows, and overwrites the original 10-year file with the consolidated data. This leaves you with a single, complete set of price data spanning the last 30+ years.


In [5]:
import pandas as pd
import os

def consolidate_all_price_data():
    """
    Merges the 10-year and 30-year price data files, removing duplicates.
    """
    data_dir = 'Price Data'
    chunks = ["A-F", "G-L", "M-R", "S-Z"]
    
    print("--- Starting consolidation of all price data. ---")

    for chunk in chunks:
        ten_yr_file = os.path.join(data_dir, f"price_data_{chunk}.csv")
        thirty_yr_file = os.path.join(data_dir, f"30yr_price_data_{chunk}.csv")

        if not os.path.exists(ten_yr_file):
            print(f"Warning: Base file not found for chunk '{chunk}'. Skipping.")
            continue
        if not os.path.exists(thirty_yr_file):
            print(f"Warning: 30-year file not found for chunk '{chunk}'. Skipping.")
            continue
            
        print(f"Processing chunk '{chunk}'...")

        # Read both files
        ten_yr_df = pd.read_csv(ten_yr_file)
        thirty_yr_df = pd.read_csv(thirty_yr_file)
        
        # Concatenate, drop duplicates, and save
        combined_df = pd.concat([ten_yr_df, thirty_yr_df], ignore_index=True)
        
        initial_rows = len(combined_df)
        combined_df.drop_duplicates(subset=['symbol', 'date'], keep='first', inplace=True)
        final_rows = len(combined_df)
        
        # Overwrite the original 10-year file with the fully consolidated data
        combined_df.to_csv(ten_yr_file, index=False)
        
        print(f"  - Combined '{chunk}': {initial_rows} rows -> {final_rows} rows after deduplication.")
        print(f"  - Saved consolidated data to '{ten_yr_file}'.")

    print("--- Consolidation of all price reports is complete. ---")

# Run the consolidation function
consolidate_all_price_data()



--- Starting consolidation of all price data. ---
Processing chunk 'A-F'...
  - Combined 'A-F': 10807053 rows -> 10802555 rows after deduplication.
  - Saved consolidated data to 'Price Data\price_data_A-F.csv'.
Processing chunk 'G-L'...
  - Combined 'G-L': 5096767 rows -> 5094623 rows after deduplication.
  - Saved consolidated data to 'Price Data\price_data_G-L.csv'.
Processing chunk 'M-R'...
  - Combined 'M-R': 6660907 rows -> 6658106 rows after deduplication.
  - Saved consolidated data to 'Price Data\price_data_M-R.csv'.
Processing chunk 'S-Z'...
  - Combined 'S-Z': 6301202 rows -> 6298545 rows after deduplication.
  - Saved consolidated data to 'Price Data\price_data_S-Z.csv'.
--- Consolidation of all price reports is complete. ---
