# Data Exploration

This notebook explores the synthetic safety telemetry data.

## Objectives
1. Load and inspect raw data
2. Understand data distributions
3. Identify rare events
4. Explore temporal patterns

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

# Load data
data_dir = Path('../data/synthetic')
trips_file = data_dir / 'trips.jsonl'
events_file = data_dir / 'events.jsonl'

# Load trips
trips = []
if trips_file.exists():
    with open(trips_file, 'r') as f:
        for line in f:
            trips.append(json.loads(line))
    df_trips = pd.DataFrame(trips)
    print(f"Loaded {len(df_trips)} trips")
else:
    print("No trips data found. Run: python -m ingestion.generator")
    df_trips = pd.DataFrame()

# Load events
events = []
if events_file.exists():
    with open(events_file, 'r') as f:
        for line in f:
            events.append(json.loads(line))
    df_events = pd.DataFrame(events)
    print(f"Loaded {len(df_events)} events")
else:
    print("No events data found. Run: python -m ingestion.generator")
    df_events = pd.DataFrame()

In [None]:
# Explore trips data
if not df_trips.empty:
    print("Trips Summary:")
    print(df_trips.describe())
    print("\nTrips by Operating Mode:")
    print(df_trips['operating_mode'].value_counts())
    print("\nTrips by Weather Condition:")
    print(df_trips['weather_condition'].value_counts())

In [None]:
# Explore events data
if not df_events.empty:
    print("Events Summary:")
    print(f"Total events: {len(df_events)}")
    print(f"Event types: {df_events['event_type'].value_counts()}")
    print(f"Event severities: {df_events['event_severity'].value_counts()}")
    
    # Rare events
    rare_events = df_events[df_events['event_type'].str.contains('RARE', na=False)]
    print(f"\nRare events: {len(rare_events)} ({len(rare_events)/len(df_events)*100:.2f}%)")