# 🔮 Combining Traffic Data for Visualization

This notebook combines the processed traffic infringement and mobile speed camera datasets into a unified format with additional filtering fields.

## Overview

1. Load processed datasets
2. Standardize properties
3. Add district/region information
4. Create unified dataset
5. Export for visualization


In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import json
import os
from pathlib import Path
import geopandas as gpd
from shapely.geometry import Point

## 1. Load Processed Datasets

First, we'll load the processed GeoJSON files for infringements and speed cameras.


In [2]:
# Set paths
ROOT_DIR = Path('../')
OUTPUT_DIR = ROOT_DIR / 'output'
COMBINED_DIR = OUTPUT_DIR / 'combined'
CLIENT_DATA_DIR = ROOT_DIR / '..' / 'client' / 'data'

# Create directories if they don't exist
os.makedirs(COMBINED_DIR, exist_ok=True)
os.makedirs(CLIENT_DATA_DIR, exist_ok=True)

# Load GeoJSON files
with open(OUTPUT_DIR / 'infringements.json', 'r') as f:
    infringements_geojson = json.load(f)

with open(OUTPUT_DIR / 'speed_cameras.json', 'r') as f:
    cameras_geojson = json.load(f)

# Preview feature counts
print(f"Loaded {len(infringements_geojson['features'])} infringement features")
print(f"Loaded {len(cameras_geojson['features'])} camera features")

Loaded 15 infringement features
Loaded 314 camera features


## 2. Standardize Properties

Next, we'll standardize the property fields across both datasets to ensure consistency.


In [3]:
# Examine the property structure of each dataset
print("Infringements properties sample:")
print(json.dumps(infringements_geojson['features'][0]['properties'], indent=2))
print("\nSpeed cameras properties sample:")
print(json.dumps(cameras_geojson['features'][0]['properties'], indent=2))

Infringements properties sample:
{
  "intensity": 100.0,
  "location": "NORTH BRISBANE",
  "count": 7541
}

Speed cameras properties sample:
{
  "intensity": 100.0,
  "location": "Monaro Highway",
  "visits": 3774,
  "hours": 5040.99,
  "checked": 4421729
}


In [4]:
# Function to standardize properties for each dataset
def standardize_infringement_properties(feature):
    props = feature['properties']
    
    # Add data type identifier
    props['data_type'] = 'infringement'
    
    # Rename fields for consistency
    if 'location' in props:
        props['district'] = props['location']
        props['region'] = 'Queensland'  # Add region information
        
    # Add empty fields that exist in the other dataset for consistency
    if 'visits' not in props:
        props['visits'] = None
    if 'hours' not in props:
        props['hours'] = None
    if 'checked' not in props:
        props['checked'] = None
        
    # Ensure all records have intensity values
    if 'intensity' not in props and 'count' in props:
        # We're keeping the original intensity calculation
        props['intensity'] = props.get('intensity', float(props['count']))
        
    return feature

def standardize_camera_properties(feature):
    props = feature['properties']
    
    # Add data type identifier
    props['data_type'] = 'speed_camera'
    
    # Assign district based on location grouping
    # For ACT locations, we'll map them to districts
    props['district'] = 'ACT'  # Default district
    
    # You could add more specific district mapping here based on coordinates or other data
    # For example, you could divide Canberra into North, South, East, West districts
    # This is a simplified example - you may want to develop a more sophisticated mapping
    coords = feature['geometry']['coordinates']
    lat = coords[1]
    lon = coords[0]
    
    # Simple district assignment based on coordinates (customize as needed)
    if lat < -35.3:
        props['district'] = 'ACT South'
    elif lat > -35.2:
        props['district'] = 'ACT North'
    else:
        props['district'] = 'ACT Central'
    
    props['region'] = 'Australian Capital Territory'
    
    # Add empty fields that exist in the other dataset for consistency
    if 'count' not in props:
        props['count'] = props.get('visits', 0)
        
    return feature

# Apply the standardization functions
standardized_infringements = [standardize_infringement_properties(feature) 
                              for feature in infringements_geojson['features']]
standardized_cameras = [standardize_camera_properties(feature) 
                       for feature in cameras_geojson['features']]

# Check the results
print("\nStandardized infringement example:")
print(json.dumps(standardized_infringements[0]['properties'], indent=2))
print("\nStandardized camera example:")
print(json.dumps(standardized_cameras[0]['properties'], indent=2))


Standardized infringement example:
{
  "intensity": 100.0,
  "location": "NORTH BRISBANE",
  "count": 7541,
  "data_type": "infringement",
  "district": "NORTH BRISBANE",
  "region": "Queensland",
  "visits": null,
  "hours": null,
  "checked": null
}

Standardized camera example:
{
  "intensity": 100.0,
  "location": "Monaro Highway",
  "visits": 3774,
  "hours": 5040.99,
  "checked": 4421729,
  "data_type": "speed_camera",
  "district": "ACT South",
  "region": "Australian Capital Territory",
  "count": 3774
}


## 3. Create Additional Filtering Fields

Now we'll add additional fields that will be useful for filtering in the visualization.


In [5]:
# Define a function to add categories based on intensity or other properties
def add_filtering_fields(feature):
    props = feature['properties']
    
    # Add intensity category
    intensity = props.get('intensity', 0)
    if intensity >= 80:
        props['intensity_category'] = 'Very High'
    elif intensity >= 60:
        props['intensity_category'] = 'High'
    elif intensity >= 40:
        props['intensity_category'] = 'Medium'
    elif intensity >= 20:
        props['intensity_category'] = 'Low'
    else:
        props['intensity_category'] = 'Very Low'
    
    # For speed cameras, add a category based on visit frequency
    if props['data_type'] == 'speed_camera' and 'visits' in props and props['visits']:
        visits = props['visits']
        if visits >= 1000:
            props['frequency'] = 'Very Frequent'
        elif visits >= 500:
            props['frequency'] = 'Frequent'
        elif visits >= 250:
            props['frequency'] = 'Regular'
        elif visits >= 100:
            props['frequency'] = 'Occasional'
        else:
            props['frequency'] = 'Rare'
    
    # For infringements, add a category based on count
    if props['data_type'] == 'infringement' and 'count' in props and props['count']:
        count = props['count']
        if count >= 5000:
            props['frequency'] = 'Very High'
        elif count >= 3000:
            props['frequency'] = 'High'
        elif count >= 1000:
            props['frequency'] = 'Medium'
        elif count >= 500:
            props['frequency'] = 'Low'
        else:
            props['frequency'] = 'Very Low'
    
    return feature

# Apply the filtering fields function
enhanced_infringements = [add_filtering_fields(feature) for feature in standardized_infringements]
enhanced_cameras = [add_filtering_fields(feature) for feature in standardized_cameras]

# Check the results
print("\nEnhanced infringement example:")
print(json.dumps(enhanced_infringements[0]['properties'], indent=2))
print("\nEnhanced camera example:")
print(json.dumps(enhanced_cameras[0]['properties'], indent=2))


Enhanced infringement example:
{
  "intensity": 100.0,
  "location": "NORTH BRISBANE",
  "count": 7541,
  "data_type": "infringement",
  "district": "NORTH BRISBANE",
  "region": "Queensland",
  "visits": null,
  "hours": null,
  "checked": null,
  "intensity_category": "Very High",
  "frequency": "Very High"
}

Enhanced camera example:
{
  "intensity": 100.0,
  "location": "Monaro Highway",
  "visits": 3774,
  "hours": 5040.99,
  "checked": 4421729,
  "data_type": "speed_camera",
  "district": "ACT South",
  "region": "Australian Capital Territory",
  "count": 3774,
  "intensity_category": "Very High",
  "frequency": "Very Frequent"
}


In [None]:
# Add random weather and time-of-day information to the data
import random

def add_environmental_factors(feature):
    props = feature['properties']
    
    # Add random weather condition (80% dry, 20% rainy - adjust as needed)
    weather_rand = random.random()
    if weather_rand < 0.8:
        props['weather'] = 'Dry'
    else:
        props['weather'] = 'Rainy'
    
    # Add random time of day (70% day, 30% night - adjust as needed)
    time_rand = random.random()
    if time_rand < 0.7:
        props['time_of_day'] = 'Day'
    else:
        props['time_of_day'] = 'Night'
    
    # Optionally add more specific time periods
    hour_rand = random.randint(0, 23)
    if 6 <= hour_rand < 12:
        props['time_period'] = 'Morning'
    elif 12 <= hour_rand < 18:
        props['time_period'] = 'Afternoon'
    elif 18 <= hour_rand < 22:
        props['time_period'] = 'Evening'
    else:
        props['time_period'] = 'Late Night'
    
    return feature

# Apply environmental factors to both datasets
enhanced_infringements = [add_environmental_factors(feature) for feature in enhanced_infringements]
enhanced_cameras = [add_environmental_factors(feature) for feature in enhanced_cameras]

# Check the results with the added environmental factors
print("\nInfringement example with environmental factors:")
print(json.dumps(enhanced_infringements[0]['properties'], indent=2))
print("\nCamera example with environmental factors:")
print(json.dumps(enhanced_cameras[0]['properties'], indent=2))

# Add some summary statistics about the environmental factors
infringement_weather = [f['properties']['weather'] for f in enhanced_infringements]
camera_weather = [f['properties']['weather'] for f in enhanced_cameras]

print("\nWeather distribution:")
print(f"Infringements: Dry: {infringement_weather.count('Dry')}, Rainy: {infringement_weather.count('Rainy')}")
print(f"Speed Cameras: Dry: {camera_weather.count('Dry')}, Rainy: {camera_weather.count('Rainy')}")

infringement_time = [f['properties']['time_of_day'] for f in enhanced_infringements]
camera_time = [f['properties']['time_of_day'] for f in enhanced_cameras]

print("\nTime of day distribution:")
print(f"Infringements: Day: {infringement_time.count('Day')}, Night: {infringement_time.count('Night')}")
print(f"Speed Cameras: Day: {camera_time.count('Day')}, Night: {camera_time.count('Night')}")

## 4. Create Unified Dataset

Now we'll combine both datasets into a unified GeoJSON file.


In [6]:
# Combine the features
combined_features = enhanced_infringements + enhanced_cameras

# Create the unified GeoJSON
unified_geojson = {
    "type": "FeatureCollection",
    "features": combined_features
}

print(f"Combined dataset contains {len(unified_geojson['features'])} features")

# Check distribution of data types
data_types = {}
for feature in unified_geojson['features']:
    data_type = feature['properties']['data_type']
    data_types[data_type] = data_types.get(data_type, 0) + 1
    
print("\nData type distribution:")
for data_type, count in data_types.items():
    print(f"{data_type}: {count} features")

# Check distribution of districts
districts = {}
for feature in unified_geojson['features']:
    district = feature['properties'].get('district', 'Unknown')
    districts[district] = districts.get(district, 0) + 1
    
print("\nDistrict distribution:")
sorted_districts = sorted(districts.items(), key=lambda x: x[1], reverse=True)
for district, count in sorted_districts[:10]:  # Show top 10
    print(f"{district}: {count} features")

Combined dataset contains 329 features

Data type distribution:
infringement: 15 features
speed_camera: 314 features

District distribution:
ACT South: 171 features
ACT Central: 116 features
ACT North: 27 features
NORTH BRISBANE: 1 features
SOUTH BRISBANE: 1 features
GOLD COAST: 1 features
CAPRICORNIA: 1 features
SUNSHINE COAST: 1 features
FAR NORTH: 1 features
LOGAN: 1 features


## 5. Export for Visualization

Finally, we'll save the unified dataset to a file for use in the visualization.


In [None]:
# Save the unified GeoJSON
unified_geojson_path = COMBINED_DIR / 'traffic_data_unified.json'
with open(unified_geojson_path, 'w') as f:
    json.dump(unified_geojson, f, indent=2)

print(f"Saved unified GeoJSON to {unified_geojson_path}")

# Create a simplified CSV version with key fields for backup/alternative use
# Convert to DataFrame for easier CSV export
rows = []
for feature in unified_geojson['features']:
    props = feature['properties']
    coords = feature['geometry']['coordinates']
    row = {
        'data_type': props.get('data_type', ''),
        'district': props.get('district', ''),
        'region': props.get('region', ''),
        'intensity': props.get('intensity', 0),
        'intensity_category': props.get('intensity_category', ''),
        'frequency': props.get('frequency', ''),
        'count': props.get('count', 0) if props.get('count') is not None else 0,
        'visits': props.get('visits', 0) if props.get('visits') is not None else 0,
        'weather': props.get('weather', ''),
        'time_of_day': props.get('time_of_day', ''),
        'time_period': props.get('time_period', ''),
        'longitude': coords[0],
        'latitude': coords[1]
    }
    rows.append(row)

df = pd.DataFrame(rows)
unified_csv_path = COMBINED_DIR / 'traffic_data_unified.csv'
df.to_csv(unified_csv_path, index=False)
print(f"Saved unified CSV to {unified_csv_path}")

# Copy to client data directory for the web app
import shutil
shutil.copy(unified_geojson_path, CLIENT_DATA_DIR / 'data.json')
shutil.copy(unified_csv_path, CLIENT_DATA_DIR / 'data.csv')
print(f"Copied files to web app directory: {CLIENT_DATA_DIR}")

Saved unified GeoJSON to ../output/combined/traffic_data_unified.json
Saved unified CSV to ../output/combined/traffic_data_unified.csv
Copied files to web app directory: ../../client/data


## 6. Create Overview Statistics

Let's create some summary statistics that might be useful for the visualization.


In [None]:
# Calculate statistics by region
region_stats = df.groupby(['region', 'data_type']).agg({
    'count': 'sum',
    'visits': 'sum',
    'district': 'nunique'
}).reset_index()

# Rename columns for clarity
region_stats = region_stats.rename(columns={'district': 'num_districts'})

# Calculate weather statistics
weather_stats = df.groupby(['weather', 'data_type']).size().reset_index(name='count')
time_stats = df.groupby(['time_of_day', 'data_type']).size().reset_index(name='count')
period_stats = df.groupby(['time_period', 'data_type']).size().reset_index(name='count')

# Display the statistics
print("Statistics by region and data type:")
print(region_stats)

print("\nStatistics by weather and data type:")
print(weather_stats)

print("\nStatistics by time of day and data type:")
print(time_stats)

# Save summary statistics to a JSON file
summary = {
    'total_points': len(df),
    'data_types': data_types,
    'regions': df['region'].nunique(),
    'districts': df['district'].nunique(),
    'region_stats': region_stats.to_dict(orient='records'),
    'weather_stats': weather_stats.to_dict(orient='records'),
    'time_stats': time_stats.to_dict(orient='records'),
    'period_stats': period_stats.to_dict(orient='records')
}

summary_path = COMBINED_DIR / 'traffic_data_summary.json'
with open(summary_path, 'w') as f:
    json.dump(summary, f, indent=2)

# Copy to client data directory
shutil.copy(summary_path, CLIENT_DATA_DIR / 'traffic_data_summary.json')
print(f"\nSaved and copied summary statistics with environmental factors")

Statistics by region and data type:
                         region     data_type  count  visits  num_districts
0  Australian Capital Territory  speed_camera  83486   83486              3
1                    Queensland  infringement  70102       0             15

Saved and copied summary statistics
