In [1]:
import pandas as pd
from matplotlib import pyplot as plt

In [2]:
import sys
from pathlib import Path

current = Path.cwd()

indicators = [
    '.git', 'pyproject.toml'
]

for parent in [current] + list(current.parents):
    if any((parent / indicator).exists() for indicator in indicators):
        project_root = parent

sys.path.insert(0, str(project_root))

In [None]:
filepath = Path(project_root / "data/raw/2023/Australian Grand Prix/FP1/session_info.json")

In [None]:
print(filepath)

In [None]:
import json

In [None]:
with open(filepath, "r", encoding="utf-8") as f:
    data = json.load(f)

print(data)

In [None]:
type(data)

In [None]:
copy = data.copy()

In [None]:
copy

In [None]:
location_clean = str(copy['location'].strip().title())
location_clean

In [None]:
copy['location'].title()

In [None]:
off_event_name = copy["official_event_name"]
off_event_name

In [None]:
event_name_cleaning = {
    "FORMULA 1": "",
    "GRAND PRIX": "GP",
    "EMIRATES": "",  # Sponsor names
    "ARAMCO": "",  # Sponsor names
}

In [None]:
off_event_name = "FORMULA 1 ROLEX AUSTRALIAN GRAND PRIX 2023"
off_event_name

In [None]:
for pattern, replacement in event_name_cleaning.items():
    print(f"Pattern = {pattern}")
    print(f"Replacement = {replacement}")
    off_event_name = off_event_name.replace(pattern, replacement)

off_event_name

In [None]:
off_event_name = "FORMULA 1 ROLEX AUSTRALIAN GRAND PRIX 2023"
off_event_name = off_event_name.replace("FORMULA 1", "")
off_event_name

In [None]:
session_type_mapping = {
    "Practice 1": "FP1",
    "Practice 2": "FP2",
    "Practice 3": "FP3",
    "Qualifying": "Q",
    "Race": "R",
    "Sprint": "S",
    "Sprint Qualifying": "SQ",
    "Sprint Shootout": "SS",
}

In [None]:
session_name = str(copy["session_name"]).strip()
session_name

In [None]:
session_type = session_type_mapping.get(session_name, session_name)
session_type


In [3]:
"""
Test script for SessionProcessor
"""

from config.logging import setup_logging, get_logger
from src.data_processing.core.session_processor import SessionProcessor
from src.data_processing.base.processing_context import ProcessingContext
import json


def test_session_processor():
    """Test SessionProcessor with sample data"""

    setup_logging()
    logger = get_logger("test_session_processor")

    logger.info("=== Testing SessionProcessor ===")

    # Create sample session data (like what your ingestion produces)
    sample_session_data = {
        "event_name": "Monaco Grand Prix",
        "location": "Monte Carlo",
        "country": "Monaco",
        "session_name": "Qualifying",
        "session_date": "2023-05-27",
        "official_event_name": "FORMULA 1 GRAND PRIX DE MONACO 2023",
        "event_format": "conventional",
        "round_number": 6,
    }

    try:
        # Create processor
        processor = SessionProcessor()

        # Create context
        context = ProcessingContext(year=2023, event_name="Monaco", session_type="Q")

        # Process data
        logger.info("Processing sample session data...")
        result_df, updated_context = processor.process(sample_session_data, context)

        # Display results
        logger.info("✅ Processing completed successfully!")
        logger.info(f"Output shape: {result_df.shape}")
        logger.info(f"Output columns: {list(result_df.columns)}")
        logger.info("Sample output:")
        print(result_df.to_string())

        # Display processing stats
        stats = processor.get_processing_stats()
        logger.info(f"Processing stats: {stats}")

        # Display context metadata
        logger.info("Context metadata:")
        context_dict = updated_context.to_dict()
        print(json.dumps(context_dict, indent=2, default=str))

        return True

    except Exception as e:
        logger.error(f"❌ Test failed: {str(e)}")
        import traceback

        traceback.print_exc()
        return False


In [4]:
test1_passed = test_session_processor()
print("Test 1 passed successfully.")

2025-09-29 13:10:02 - config.logging - INFO - setup_logging:100 - Logging configured. Log directory: /Volumes/ExternalSSD/My Projects/Data Science Projects/f1_ds_project/notebooks/EDA_and_DataProcessing/monitoring/logs
2025-09-29 13:10:02 - config.logging - INFO - setup_logging:101 - Environment: development
2025-09-29 13:10:02 - test_session_processor - INFO - test_session_processor:17 - === Testing SessionProcessor ===
2025-09-29 13:10:02 - test_session_processor - INFO - test_session_processor:39 - Processing sample session data...
2025-09-29 13:10:02 - data_processing.session_processor - INFO - process:91 - Starting processing with session_processor
2025-09-29 13:10:02 - data_processing.session_processor - INFO - _process_data:427 - Processing F1 session metadata
2025-09-29 13:10:02 - data_processing.session_processor - INFO - _process_data:447 - Successfully processed session: 2023_Monaco_Grand_Prix_Q
2025-09-29 13:10:02 - data_processing.session_processor - INFO - process:135 - P

In [18]:
def test_with_real_data():
    """Test with real ingested data"""
    
    setup_logging()
    logger = get_logger('test_session_processor_real')
    
    logger.info("=== Testing SessionProcessor with Real Data ===")
    
    # Try to load real session data
    try:
        sample_file = Path(project_root / "data/raw/2023/Monaco Grand Prix/Q/session_info.json")
        
        if not sample_file.exists():
            logger.warning("Real data file not found: %s", sample_file)
            logger.info("Skipping real data test")
            return True
        
        # Load real session info
        with open(sample_file, 'r', encoding='utf-8') as f:
            real_session_info = json.load(f)
        
        # Process with SessionProcessor
        processor = SessionProcessor()
        context = ProcessingContext(year=2023, event_name='Monaco', session_type='Q')
        
        result_df, updated_context = processor.process(real_session_info, context)
        
        logger.info("✅ Real data processing completed!")
        logger.info(f"Output shape: {result_df.shape}")
        print(result_df.to_string())
        
        return True
        
    except Exception as e:
        logger.error(f"❌ Real data test failed: {str(e)}")
        import traceback
        traceback.print_exc()
        return False

In [26]:
test_with_real_data()

2025-09-29 13:18:28 - config.logging - INFO - setup_logging:100 - Logging configured. Log directory: /Volumes/ExternalSSD/My Projects/Data Science Projects/f1_ds_project/notebooks/EDA_and_DataProcessing/monitoring/logs
2025-09-29 13:18:28 - config.logging - INFO - setup_logging:101 - Environment: development
2025-09-29 13:18:28 - test_session_processor_real - INFO - test_with_real_data:7 - === Testing SessionProcessor with Real Data ===
2025-09-29 13:18:28 - data_processing.session_processor - INFO - process:91 - Starting processing with session_processor
2025-09-29 13:18:28 - data_processing.session_processor - INFO - _process_data:427 - Processing F1 session metadata
2025-09-29 13:18:28 - data_processing.session_processor - INFO - _process_data:447 - Successfully processed session: 2023_Monaco_Grand_Prix_Q
2025-09-29 13:18:28 - data_processing.session_processor - INFO - process:135 - Processing completed successfully in 0.10 s
2025-09-29 13:18:28 - test_session_processor_real - INFO 

True

In [19]:
sample_file = Path(project_root / "data/raw/2023/Monaco Grand Prix/Q/session_info.json")

In [20]:
with open(sample_file, 'r', encoding='utf-8') as f:
            real_session_info = json.load(f)

In [21]:
real_session_info

{'event_name': 'Monaco Grand Prix',
 'location': 'Monaco',
 'country': 'Monaco',
 'session_name': 'Qualifying',
 'session_date': '2023-05-27 14:00:00',
 'event_format': 'conventional',
 'round_number': '6',
 'official_event_name': 'FORMULA 1 GRAND PRIX DE MONACO 2023'}

In [22]:
processor = SessionProcessor()
context = ProcessingContext(year=2023, event_name='Monaco Grand Prix', session_type='Q')

In [23]:
result_df, updated_context = processor.process(real_session_info, context)

2025-09-29 13:16:44 - data_processing.session_processor - INFO - process:91 - Starting processing with session_processor
2025-09-29 13:16:44 - data_processing.session_processor - INFO - _process_data:427 - Processing F1 session metadata
2025-09-29 13:16:44 - data_processing.session_processor - INFO - _process_data:447 - Successfully processed session: 2023_Monaco_Grand_Prix_Q
2025-09-29 13:16:44 - data_processing.session_processor - INFO - process:135 - Processing completed successfully in 0.06 s


In [25]:
result_df

Unnamed: 0,session_id,event_name_clean,session_name_clean,session_type,session_date_clean,year,location_clean,country_clean,weekend_format,round_number,processed_at
0,2023_Monaco_Grand_Prix_Q,Monaco Grand Prix,Qualifying,Q,2023-05-27 14:00:00,2023,Monaco,Monaco,Conventional,6,2025-09-29 13:16:44.296269
