# Import libraries
from load_data_enhanced import load_data
from bls_package import get_available_categories
import pandas as pd

# Get all available tickers from Excel file
tickers = get_available_categories(200)  # Fetch up to 200 categories
print(f"Found {len(tickers)} tickers")

# Load data for all tickers
data = load_data(tickers, "2025-06")

# Create DataFrame
df = pd.DataFrame(data)

# Print DataFrame
print(df)

In [2]:
# Import libraries
import requests
import pandas as pd
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

📊 Libraries imported successfully!


## Method 1: Using the BLS Client Module (Recommended)

In [5]:
# Create BLS API client
client = BLSClient(API_URL)

# The client will automatically test the connection when created

✅ Connected to BLS API at http://localhost:8000
   Data available: True


In [None]:
# Get ALL available categories from the API
try:
    all_categories = client.get_categories(200)  # Get up to 200 categories
    print(f"\nFound {len(all_categories)} total categories:")
    print("First 20 categories:")
    for i, cat in enumerate(all_categories[:20], 1):
        print(f"   {i:2d}. {cat}")
    
    if len(all_categories) > 20:
        print(f"\n... and {len(all_categories) - 20} more categories")
        
    # Store all categories for testing
    test_categories = all_categories
except Exception as e:
    print(f"Could not retrieve all categories: {e}")
    # Fallback to subset if API fails
    test_categories = [
        "All items",
        "Food", 
        "Energy",
        "Shelter",
        "Transportation",
        "Medical care"
    ]

## 📊 Basic Data Loading

In [None]:
# Use all available categories for comprehensive testing
target_date = "2025-06"

print(f"Loading data for ALL {len(test_categories)} categories from Excel file")
print(f"Target Date: {target_date}")
print(f"\nSample categories to be tested:")
for i, cat in enumerate(test_categories[:10], 1):
    print(f"   {i}. {cat}")
if len(test_categories) > 10:
    print(f"   ... and {len(test_categories) - 10} more")

In [None]:
# use load_data function to print the dataframe
client.load_data(test_categories, target_date)

In [None]:
# Create separate NSA and SA columns and show actual index values
if df is not None:
    print("creating separate nsa/sa columns with actual index values:")
    print("=" * 60)
    
    # Create a clean dataframe with separate NSA/SA columns
    result_df = df[['category']].copy()
    
    # Add NSA columns
    nsa_cols = [col for col in df.columns if col.startswith('nsa_')]
    for nsa_col in nsa_cols:
        # Extract date from column name and create cleaner column name
        date_part = nsa_col.replace('nsa_', '')
        result_df[f'nsa_{date_part}'] = df[nsa_col]
    
    # Add SA columns  
    sa_cols = [col for col in df.columns if col.startswith('sa_')]
    for sa_col in sa_cols:
        # Extract date from column name and create cleaner column name
        date_part = sa_col.replace('sa_', '')
        result_df[f'sa_{date_part}'] = df[sa_col]
    
    print(f"processed {len(result_df)} categories")
    print(f"columns: {list(result_df.columns)}")
    
    # Display first 15 rows showing actual index values
    print(f"\nactual index values (first 15 categories):")
    display(result_df.head(15))
    
    # Show summary statistics for the actual index values
    numeric_cols = [col for col in result_df.columns if col != 'category']
    if numeric_cols:
        print(f"\nsummary statistics for actual index values:")
        print("=" * 50)
        for col in numeric_cols[:4]:  # Show first 4 numeric columns
            values = result_df[col].dropna()
            if len(values) > 0:
                print(f"{col}:")
                print(f"   mean: {values.mean():.3f}")
                print(f"   min:  {values.min():.3f}")
                print(f"   max:  {values.max():.3f}")
                print(f"   range: {values.max() - values.min():.3f}")
                print()
    
    # Save the complete dataset
    print(f"total categories processed: {len(result_df)}")
    print(f"data includes all categories from excel file")
else:
    print("no data available to process")