**Data Sources (as per Part 3 requirements):**
- BLS Employment Data: `s3://rearc-deepa-demo/raw/pr/pr.data.0.Current`
- DataUSA Population: `s3://rearc-deepa-demo/raw/datausa/population/`

**Analyses Performed:**
1. Population statistics for years 2013-2018 (mean and standard deviation)
2. Best year per BLS series (year with maximum sum of quarterly values)
3. Series PRS30006032 Period Q01 joined with population data

In [1]:
!pip install boto3 awscrt



In [2]:
import pandas as pd
import boto3
import json
from io import StringIO, BytesIO
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("✓ Libraries imported successfully")
print(f"Pandas version: {pd.__version__}")
print(f"Boto3 version: {boto3.__version__}")

✓ Libraries imported successfully
Pandas version: 2.2.3
Boto3 version: 1.42.14


## 2. Configure S3 Client

In [3]:
import subprocess
import os

# Run AWS SSO login
aws_path = r"C:\Program Files\Amazon\AWSCLIV2\aws.exe"
if os.path.exists(aws_path):
    result = subprocess.run([aws_path, 'sso', 'login'], capture_output=True, text=True)
    print(result.stdout)
    if result.stderr:
        print(result.stderr)
    print("\n✓ AWS login completed. Browser should have opened for authentication.")
else:
    print("⚠️ AWS CLI not found. Please run 'aws sso login' in a terminal window.")



Missing the following required SSO configuration values: sso_start_url, sso_region. To make sure this profile is properly configured to use SSO, please run: aws configure sso


✓ AWS login completed. Browser should have opened for authentication.


In [4]:

s3_client = boto3.client('s3', region_name='eu-north-1')
bucket_name = 'rearc-deepa-demo'

print(f"✓ S3 client configured for bucket: {bucket_name}")


✓ S3 client configured for bucket: rearc-deepa-demo


## 3. Load BLS Employment Data

Load the BLS pr.data.0.Current file (tab-delimited CSV with employment metrics)

In [5]:
# Load BLS data file
bls_data_key = 'raw/pr/pr.data.0.Current'
obj = s3_client.get_object(Bucket=bucket_name, Key=bls_data_key)

In [6]:
obj

{'ResponseMetadata': {'RequestId': 'YW930BYY0YP33VZB',
  'HostId': 'Dk9mAX2qh5b5Vn9msNeY1FW6AaicInMoPIwMEdLWKh6tG60uu09gtf9PeIFKODYNZp7Aix2a0itdVFXzYcdPl05G+3NZXax4',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'Dk9mAX2qh5b5Vn9msNeY1FW6AaicInMoPIwMEdLWKh6tG60uu09gtf9PeIFKODYNZp7Aix2a0itdVFXzYcdPl05G+3NZXax4',
   'x-amz-request-id': 'YW930BYY0YP33VZB',
   'date': 'Sat, 20 Dec 2025 13:55:31 GMT',
   'last-modified': 'Fri, 19 Dec 2025 14:06:45 GMT',
   'etag': '"1d7786e2769dae2411cffb668e1c0466"',
   'x-amz-checksum-crc32': 'RAtGZQ==',
   'x-amz-checksum-type': 'FULL_OBJECT',
   'x-amz-server-side-encryption': 'AES256',
   'x-amz-meta-source-last-modified': '2025-09-04T07:30:00+00:00',
   'x-amz-meta-source-url': 'https://download.bls.gov/pub/time.series/pr/pr.data.0.Current',
   'accept-ranges': 'bytes',
   'content-type': 'binary/octet-stream',
   'content-length': '1564284',
   'server': 'AmazonS3'},
  'ChecksumAlgorithm': 'crc32',
  'RetryAttempts': 0},
 'AcceptRanges': '

In [7]:

bls_data = pd.read_csv(StringIO(obj['Body'].read().decode('utf-8')), sep='\t')

# Clean column names (remove extra whitespace)
bls_data.columns = bls_data.columns.str.strip()

print(f"✓ Loaded BLS data: {len(bls_data):,} rows")
print(f"✓ Columns: {list(bls_data.columns)}")
print("\nFirst few rows:")
bls_data.head()

✓ Loaded BLS data: 37,239 rows
✓ Columns: ['series_id', 'year', 'period', 'value', 'footnote_codes']

First few rows:


Unnamed: 0,series_id,year,period,value,footnote_codes
0,PRS30006011,1995,Q01,2.6,
1,PRS30006011,1995,Q02,2.1,
2,PRS30006011,1995,Q03,0.9,
3,PRS30006011,1995,Q04,0.1,
4,PRS30006011,1995,Q05,1.4,


In [8]:
print(f"\n1. BASIC INFO:")
print(f"   Shape: {bls_data.shape}")
print(f"   Columns: {list(bls_data.columns)}")


1. BASIC INFO:
   Shape: (37239, 5)
   Columns: ['series_id', 'year', 'period', 'value', 'footnote_codes']


In [9]:
# 2. Missing Values
print(f"\n2. MISSING VALUES:")
missing = bls_data.isnull().sum()
if missing.sum() > 0:
    print(missing[missing > 0])
else:
    print("   ✓ No missing values")



2. MISSING VALUES:
footnote_codes    37052
dtype: int64


In [10]:
# Check and drop footnote_codes if not useful
if 'footnote_codes' in bls_data.columns:
    null_count = bls_data['footnote_codes'].isnull().sum()
    empty_count = (bls_data['footnote_codes'] == '').sum()
    unique_count = bls_data['footnote_codes'].nunique()
    
    print(f"footnote_codes analysis:")
    print(f"  Null values: {null_count}/{len(bls_data)} ({null_count/len(bls_data)*100:.1f}%)")
    print(f"  Empty strings: {empty_count}")
    print(f"  Unique values: {unique_count}")
    
    # Drop if mostly null/empty or low information value
    if null_count + empty_count > len(bls_data) * 0.8:
        bls_data = bls_data.drop(columns=['footnote_codes'])
        print(f"  ✓ Dropped footnote_codes (mostly null/empty)")
    else:
        print(f"  Sample values: {bls_data['footnote_codes'].value_counts().head()}")
else:
    print("footnote_codes column not found")

footnote_codes analysis:
  Null values: 37052/37239 (99.5%)
  Empty strings: 0
  Unique values: 1
  ✓ Dropped footnote_codes (mostly null/empty)


In [11]:
# 3. Duplicates
print(f"\n3. DUPLICATES:")
dup_count = bls_data.duplicated().sum()
print(f"   Total duplicate rows: {dup_count}")
if dup_count > 0:
    print(f"   ⚠️ Found {dup_count} duplicate rows - consider removing")



3. DUPLICATES:
   Total duplicate rows: 0


In [12]:
# 4. Data Types
print(f"\n4. DATA TYPES:")
print(bls_data.dtypes)


4. DATA TYPES:
series_id     object
year           int64
period        object
value        float64
dtype: object


In [13]:
# 5. Whitespace Issues
print(f"\n5. WHITESPACE ISSUES:")
for col in bls_data.select_dtypes(include='object').columns:
    ws_count = (bls_data[col].str.strip() != bls_data[col]).sum()
    if ws_count > 0:
        print(f"   ⚠️ {col}: {ws_count} rows with leading/trailing whitespace")
 


5. WHITESPACE ISSUES:
   ⚠️ series_id: 37239 rows with leading/trailing whitespace


In [29]:
bls_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37239 entries, 0 to 37238
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   series_id  37239 non-null  object 
 1   year       37239 non-null  int64  
 2   period     37239 non-null  object 
 3   value      37239 non-null  float64
dtypes: float64(1), int64(1), object(2)
memory usage: 1.1+ MB


In [30]:
bls_data.describe(include='all')

Unnamed: 0,series_id,year,period,value
count,37239,37239.0,37239,37239.0
unique,282,,5,
top,PRS88003203,,Q05,
freq,152,,8325,
mean,,2009.648353,,32.277663
std,,8.752694,,44.670886
min,,1995.0,,-54.3
25%,,2002.0,,0.7
50%,,2010.0,,3.9
75%,,2017.0,,83.3015


In [31]:
bls_data.describe()

Unnamed: 0,year,value
count,37239.0,37239.0
mean,2009.648353,32.277663
std,8.752694,44.670886
min,1995.0,-54.3
25%,2002.0,0.7
50%,2010.0,3.9
75%,2017.0,83.3015
max,2025.0,384.6


In [14]:
# 6. Value Range Analysis
print(f"\n6. VALUE COLUMN ANALYSIS:")
if 'value' in bls_data.columns:
    print(f"   Data type: {bls_data['value'].dtype}")
    print(f"   Min: {bls_data['value'].min()}")
    print(f"   Max: {bls_data['value'].max()}")
    print(f"   Mean: {bls_data['value'].mean():.2f}")
    # Check for non-numeric values if stored as object
    if bls_data['value'].dtype == 'object':
        non_numeric = pd.to_numeric(bls_data['value'], errors='coerce').isnull().sum()
        if non_numeric > 0:
            print(f"   ⚠️ {non_numeric} non-numeric values found")


6. VALUE COLUMN ANALYSIS:
   Data type: float64
   Min: -54.3
   Max: 384.6
   Mean: 32.28


In [15]:
# 7. Unique Value Counts
print(f"\n7. UNIQUE VALUES:")
for col in ['series_id', 'year', 'period']:
    if col in bls_data.columns:
        print(f"   {col}: {bls_data[col].nunique()} unique values")


7. UNIQUE VALUES:
   series_id: 282 unique values
   year: 31 unique values
   period: 5 unique values
   series_id: 282 unique values
   year: 31 unique values
   period: 5 unique values


In [16]:

recommendations = []
if dup_count > 0:
    recommendations.append("- Remove duplicate rows")
if bls_data.isnull().sum().sum() > 0:
    recommendations.append("- Handle missing values")
for col in bls_data.select_dtypes(include='object').columns:
    if (bls_data[col].str.strip() != bls_data[col]).sum() > 0:
        recommendations.append(f"- Trim whitespace in '{col}' column")
        break
if 'value' in bls_data.columns and bls_data['value'].dtype == 'object':
    recommendations.append("- Convert 'value' column to numeric")

if recommendations:
    for rec in recommendations:
        print(rec)
else:
    print("✓ Data appears clean - no major cleanup needed")
    
print("="*70)

- Trim whitespace in 'series_id' column


In [17]:
# Trim whitespace in series_id and other string columns
for col in bls_data.select_dtypes(include='object').columns:
    bls_data[col] = bls_data[col].str.strip()

print("✓ Trimmed whitespace from all string columns")
print(f"✓ BLS data shape after cleanup: {bls_data.shape}")

✓ Trimmed whitespace from all string columns
✓ BLS data shape after cleanup: (37239, 4)


In [None]:
print(f"\n7. UNIQUE VALUES:")
for col in ['series_id', 'year', 'period']:
    if col in bls_data.columns:
        print(f"   {col}: {bls_data[col].nunique()} unique values")


7. UNIQUE VALUES:
   series_id: 282 unique values
   year: 31 unique values
   period: 5 unique values


## Load DataUSA Population Data

Load the latest population data from DataUSA API (JSON format with nested structure)

In [35]:
# List all population data files and get the latest
response = s3_client.list_objects_v2(
    Bucket=bucket_name,
    Prefix='raw/datausa/population/'
)
response

{'ResponseMetadata': {'RequestId': '89AFFBT9YDFSTRV8',
  'HostId': '6hU7DdQqLeLmLlzFdd2f5xqTiIPCeWgY9lOuOem9Pyzz/vdkHJFQiZOJJaAiUIIsPPBCTnxKgiMslMekjJXTK5uP4md8WtiW',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': '6hU7DdQqLeLmLlzFdd2f5xqTiIPCeWgY9lOuOem9Pyzz/vdkHJFQiZOJJaAiUIIsPPBCTnxKgiMslMekjJXTK5uP4md8WtiW',
   'x-amz-request-id': '89AFFBT9YDFSTRV8',
   'date': 'Sat, 20 Dec 2025 14:04:01 GMT',
   'x-amz-bucket-region': 'eu-north-1',
   'content-type': 'application/xml',
   'transfer-encoding': 'chunked',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'IsTruncated': False,
 'Contents': [{'Key': 'raw/datausa/population/data.json',
   'LastModified': datetime.datetime(2025, 12, 20, 5, 49, 38, tzinfo=tzutc()),
   'ETag': '"21da76ddc3780caf07f1ba2335f83212"',
   'ChecksumAlgorithm': ['CRC32'],
   'ChecksumType': 'FULL_OBJECT',
   'Size': 1844,
   'StorageClass': 'STANDARD'}],
 'Name': 'rearc-deepa-demo',
 'Prefix': 'raw/datausa/population/',
 'MaxKeys': 1000,
 'EncodingType

In [None]:
obj = s3_client.get_object(Bucket=bucket_name, Key=pop_key)
json.loads(obj['Body'].read())

{'annotations': {'subtopic': 'Demographics',
  'topic': 'Diversity',
  'source_name': 'Census Bureau',
  'dataset_link': 'http://www.census.gov/programs-surveys/acs/',
  'source_description': 'The American Community Survey (ACS) is conducted by the US Census and sent to a portion of the population every year.',
  'table_id': 'B01003',
  'dataset_name': 'ACS 1-year Estimate'},
 'page': {'limit': 0, 'offset': 0, 'total': 10},
 'columns': ['Nation ID', 'Nation', 'Year', 'Population'],
 'data': [{'Nation ID': '01000US',
   'Nation': 'United States',
   'Year': 2013,
   'Population': 316128839.0},
  {'Nation ID': '01000US',
   'Nation': 'United States',
   'Year': 2014,
   'Population': 318857056.0},
  {'Nation ID': '01000US',
   'Nation': 'United States',
   'Year': 2015,
   'Population': 321418821.0},
  {'Nation ID': '01000US',
   'Nation': 'United States',
   'Year': 2016,
   'Population': 323127515.0},
  {'Nation ID': '01000US',
   'Nation': 'United States',
   'Year': 2017,
   'Populat

In [None]:
if 'Contents' in response:
    # Get the most recent file
    latest_file = sorted(response['Contents'], key=lambda x: x['LastModified'])[-1]
    pop_key = latest_file['Key']
    
    # Load JSON data
    obj = s3_client.get_object(Bucket=bucket_name, Key=pop_key)
    pop_json = json.loads(obj['Body'].read().decode('utf-8'))
    
    # Handle different JSON structures
    if 'data' in pop_json:
        # If JSON has a 'data' key, extract it
        pop_data = pd.DataFrame(pop_json['data'])
    elif isinstance(pop_json, list):
        # If JSON is a list, use it directly
        pop_data = pd.DataFrame(pop_json)
    else:
        # Otherwise try to use the JSON as-is
        pop_data = pd.DataFrame([pop_json])
    
    print(f"✓ Loaded population data from: {pop_key}")
    print(f"✓ Records: {len(pop_data):,}")
    print(f"✓ Columns: {list(pop_data.columns)}")
    print("\nPopulation data:")
    display(pop_data)
else:
    print("No population data found")
    pop_data = pd.DataFrame()

✓ Loaded population data from: raw/datausa/population/data.json
✓ Records: 10
✓ Columns: ['Nation ID', 'Nation', 'Year', 'Population']

Population data:


Unnamed: 0,Nation ID,Nation,Year,Population
0,01000US,United States,2013,316128839.0
1,01000US,United States,2014,318857056.0
2,01000US,United States,2015,321418821.0
3,01000US,United States,2016,323127515.0
4,01000US,United States,2017,325719178.0
5,01000US,United States,2018,327167439.0
6,01000US,United States,2019,328239523.0
7,01000US,United States,2021,331893745.0
8,01000US,United States,2022,333287562.0
9,01000US,United States,2023,334914896.0


## Q1

Calculate mean and standard deviation of population for years 2013-2018

In [21]:
# Filter for years 2013-2018
pop_filtered = pop_data[(pop_data['Year'] >= 2013) & (pop_data['Year'] <= 2018)].copy()
# Calculate statistics
pop_stats = pd.DataFrame({
    'Metric': ['Mean Population', 'Std Dev Population'],
    'Value': [pop_filtered['Population'].mean(), pop_filtered['Population'].std()]
})

print("POPULATION STATISTICS (2013-2018)")
print(pop_stats)
print(f"Population Analysis (2013-2018): Mean={pop_filtered['Population'].mean():.0f}, StdDev={pop_filtered['Population'].std():.0f}")
# Log results
import logging
logging.basicConfig(level=logging.INFO)
logging.info(f"Population Analysis (2013-2018): Mean={pop_filtered['Population'].mean():.0f}, StdDev={pop_filtered['Population'].std():.0f}")

INFO:root:Population Analysis (2013-2018): Mean=322069808, StdDev=4158441


POPULATION STATISTICS (2013-2018)
               Metric         Value
0     Mean Population  3.220698e+08
1  Std Dev Population  4.158441e+06
Population Analysis (2013-2018): Mean=322069808, StdDev=4158441


In [53]:
 bls_data.groupby(['series_id', 'year'])['value'].sum()#.reset_index()

series_id    year
PRS30006011  1995      7.100
             1996     -0.500
             1997      4.400
             1998      4.200
             1999     -7.700
                      ...   
PRS88003203  2021    523.634
             2022    562.520
             2023    577.546
             2024    583.441
             2025    236.753
Name: value, Length: 8562, dtype: float64

##Q2

In [54]:
# Find best year per series (year with max sum of quarterly values)
yearly_sums = bls_data.groupby(['series_id', 'year'])['value'].sum().reset_index()
best_years_result = yearly_sums.loc[yearly_sums.groupby('series_id')['value'].idxmax()].sort_values('series_id').reset_index(drop=True)

print(f"Best years for {len(best_years_result):,} series:")
display(best_years_result)

# Log results - series_id as key, year and value as value
best_years_dict = best_years_result.set_index('series_id')[['year', 'value']].to_dict('index')
logging.info(f"Best Year Analysis: Found {len(best_years_result)} series, Top result: {best_years_dict}")
print(f"Best Year Analysis: Found {len(best_years_result)} series, Top result: {best_years_dict}")


Best years for 282 series:


Unnamed: 0,series_id,year,value
0,PRS30006011,2022,20.500
1,PRS30006012,2022,17.100
2,PRS30006013,1998,705.895
3,PRS30006021,2010,17.700
4,PRS30006022,2010,12.400
...,...,...,...
277,PRS88003192,2002,282.800
278,PRS88003193,2024,860.838
279,PRS88003201,2022,37.200
280,PRS88003202,2022,28.700


INFO:root:Best Year Analysis: Found 282 series, Top result: {'PRS30006011': {'year': 2022, 'value': 20.5}, 'PRS30006012': {'year': 2022, 'value': 17.1}, 'PRS30006013': {'year': 1998, 'value': 705.895}, 'PRS30006021': {'year': 2010, 'value': 17.7}, 'PRS30006022': {'year': 2010, 'value': 12.4}, 'PRS30006023': {'year': 2014, 'value': 503.216}, 'PRS30006031': {'year': 2022, 'value': 20.5}, 'PRS30006032': {'year': 2021, 'value': 17.1}, 'PRS30006033': {'year': 1998, 'value': 702.672}, 'PRS30006061': {'year': 2022, 'value': 37.0}, 'PRS30006062': {'year': 2021, 'value': 31.6}, 'PRS30006063': {'year': 2024, 'value': 646.748}, 'PRS30006081': {'year': 2021, 'value': 24.4}, 'PRS30006082': {'year': 2021, 'value': 24.4}, 'PRS30006083': {'year': 2021, 'value': 110.742}, 'PRS30006091': {'year': 2002, 'value': 43.3}, 'PRS30006092': {'year': 2002, 'value': 44.4}, 'PRS30006093': {'year': 2013, 'value': 514.156}, 'PRS30006101': {'year': 2020, 'value': 33.5}, 'PRS30006102': {'year': 2020, 'value': 36.2}, '

Best Year Analysis: Found 282 series, Top result: {'PRS30006011': {'year': 2022, 'value': 20.5}, 'PRS30006012': {'year': 2022, 'value': 17.1}, 'PRS30006013': {'year': 1998, 'value': 705.895}, 'PRS30006021': {'year': 2010, 'value': 17.7}, 'PRS30006022': {'year': 2010, 'value': 12.4}, 'PRS30006023': {'year': 2014, 'value': 503.216}, 'PRS30006031': {'year': 2022, 'value': 20.5}, 'PRS30006032': {'year': 2021, 'value': 17.1}, 'PRS30006033': {'year': 1998, 'value': 702.672}, 'PRS30006061': {'year': 2022, 'value': 37.0}, 'PRS30006062': {'year': 2021, 'value': 31.6}, 'PRS30006063': {'year': 2024, 'value': 646.748}, 'PRS30006081': {'year': 2021, 'value': 24.4}, 'PRS30006082': {'year': 2021, 'value': 24.4}, 'PRS30006083': {'year': 2021, 'value': 110.742}, 'PRS30006091': {'year': 2002, 'value': 43.3}, 'PRS30006092': {'year': 2002, 'value': 44.4}, 'PRS30006093': {'year': 2013, 'value': 514.156}, 'PRS30006101': {'year': 2020, 'value': 33.5}, 'PRS30006102': {'year': 2020, 'value': 36.2}, 'PRS3000610

## Q3

In [None]:
# Filter for series PRS30006032, period Q01 and join with population
target_series, target_period = 'PRS30006032', 'Q01'
series_q01 = bls_data[(bls_data['series_id'].str.contains(target_series)) & (bls_data['period'] == target_period)].copy()

if len(pop_data) > 0:
    series_q01['year'] = series_q01['year'].astype(int)
    pop_data['Year'] = pop_data['Year'].astype(int)
    
    final_report = series_q01.merge(
        pop_data[['Year', 'Population']], 
        left_on='year', 
        right_on='Year', 
        how='left'
    )[['series_id', 'year', 'period', 'value', 'Population']].sort_values('year')
    
    print(f"Series {target_series} {target_period} + Population ({len(final_report)} rows):")
    display(final_report)
    
    # Log final report
    if len(final_report) > 0:
        year_range = f"{final_report['year'].min()}-{final_report['year'].max()}"
        final_report_dict = final_report.to_dict('records')
        logging.info(f"Analysis 3 - {target_series} {target_period}: Generated {len(final_report)} rows spanning {year_range}, Sample: {final_report_dict[:]}")
        print(f"Analysis 3 - {target_series} {target_period}: {len(final_report)} rows spanning {year_range}")
    else:
        logging.warning(f"Analysis 3 - {target_series} {target_period}: No matching data after merge")
else:
    final_report = series_q01
    logging.warning(f"Analysis 3 - {target_series} {target_period}: No population data available, returning {len(final_report)} BLS records only")
    print("⚠️ No population data available")

Series PRS30006032 Q01 + Population (10 rows):


Unnamed: 0,series_id,year,period,value,Population
0,PRS30006032,2013,Q01,0.5,316128839.0
1,PRS30006032,2014,Q01,-0.1,318857056.0
2,PRS30006032,2015,Q01,-1.7,321418821.0
3,PRS30006032,2016,Q01,-1.4,323127515.0
4,PRS30006032,2017,Q01,0.9,325719178.0
5,PRS30006032,2018,Q01,0.5,327167439.0
6,PRS30006032,2019,Q01,-1.6,328239523.0
7,PRS30006032,2021,Q01,0.7,331893745.0
8,PRS30006032,2022,Q01,5.3,333287562.0
9,PRS30006032,2023,Q01,0.3,334914896.0


INFO:root:Analysis 3 - PRS30006032 Q01: Generated 10 rows spanning 2013-2023, Sample: [{'series_id': 'PRS30006032', 'year': 2013, 'period': 'Q01', 'value': 0.5, 'Population': 316128839.0}, {'series_id': 'PRS30006032', 'year': 2014, 'period': 'Q01', 'value': -0.1, 'Population': 318857056.0}, {'series_id': 'PRS30006032', 'year': 2015, 'period': 'Q01', 'value': -1.7, 'Population': 321418821.0}, {'series_id': 'PRS30006032', 'year': 2016, 'period': 'Q01', 'value': -1.4, 'Population': 323127515.0}, {'series_id': 'PRS30006032', 'year': 2017, 'period': 'Q01', 'value': 0.9, 'Population': 325719178.0}, {'series_id': 'PRS30006032', 'year': 2018, 'period': 'Q01', 'value': 0.5, 'Population': 327167439.0}, {'series_id': 'PRS30006032', 'year': 2019, 'period': 'Q01', 'value': -1.6, 'Population': 328239523.0}, {'series_id': 'PRS30006032', 'year': 2021, 'period': 'Q01', 'value': 0.7, 'Population': 331893745.0}, {'series_id': 'PRS30006032', 'year': 2022, 'period': 'Q01', 'value': 5.3, 'Population': 33328

Analysis 3 - PRS30006032 Q01: 10 rows spanning 2013-2023


In [65]:
# Diagnostic: Check all records for PRS30006032 Q01
print("Checking BLS data for series containing PRS30006032, period Q01:")
diagnostic = bls_data[(bls_data['series_id'].str.contains('PRS30006032')) & 
                      (bls_data['period'].str.contains('Q01'))]
print(f"Found {len(diagnostic)} matching rows:")
display(diagnostic)

# Log diagnostic results
if len(diagnostic) > 0:
    year_range = f"{diagnostic['year'].min()}-{diagnostic['year'].max()}"
    diagnostic_dict = diagnostic[['series_id', 'year', 'period', 'value']].to_dict('records')
    logging.info(f"Diagnostic - PRS30006032 Q01: Found {len(diagnostic)} rows spanning {year_range}, Data: {diagnostic_dict[:5]}")  # Log first 5 records
    print(f"Diagnostic - PRS30006032 Q01: Found {len(diagnostic)} rows spanning {year_range}")
else:
    logging.warning("Diagnostic - PRS30006032 Q01: No matching records found")
    print("⚠️ No matching records found")

Checking BLS data for series containing PRS30006032, period Q01:
Found 31 matching rows:


Unnamed: 0,series_id,year,period,value
1064,PRS30006032,1995,Q01,0.0
1069,PRS30006032,1996,Q01,-4.2
1074,PRS30006032,1997,Q01,2.8
1079,PRS30006032,1998,Q01,0.9
1084,PRS30006032,1999,Q01,-4.1
1089,PRS30006032,2000,Q01,0.5
1094,PRS30006032,2001,Q01,-6.3
1099,PRS30006032,2002,Q01,-6.6
1104,PRS30006032,2003,Q01,-5.7
1109,PRS30006032,2004,Q01,2.0


INFO:root:Diagnostic - PRS30006032 Q01: Found 31 rows spanning 1995-2025, Data: [{'series_id': 'PRS30006032', 'year': 1995, 'period': 'Q01', 'value': 0.0}, {'series_id': 'PRS30006032', 'year': 1996, 'period': 'Q01', 'value': -4.2}, {'series_id': 'PRS30006032', 'year': 1997, 'period': 'Q01', 'value': 2.8}, {'series_id': 'PRS30006032', 'year': 1998, 'period': 'Q01', 'value': 0.9}, {'series_id': 'PRS30006032', 'year': 1999, 'period': 'Q01', 'value': -4.1}]


Diagnostic - PRS30006032 Q01: Found 31 rows spanning 1995-2025
