# 🔮 Traffic Infringement Data Processing

This notebook processes raw traffic infringement data and converts it into the GeoJSON format required for the heatmap visualization.

## Overview

1. Load raw data
2. Clean and preprocess
3. Geocode locations
4. Transform to GeoJSON
5. Export for visualization

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import os
from pathlib import Path

## 1. Load Raw Data

First, let's load the traffic infringement data from the CSV file.

In [None]:
# Set paths
ROOT_DIR = Path('../../')
DATA_DIR = ROOT_DIR / 'data'
OUTPUT_DIR = Path('../output')

# Create output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Load the traffic infringements data
infringements_path = DATA_DIR / 'trafficinfringementsissued.csv'
df = pd.read_csv(infringements_path, header=0)

# Display the first few rows
df.head()

In [None]:
# Basic data exploration
print(f"Dataset shape: {df.shape}")
df.info()

## 2. Clean and Preprocess Data

We need to clean the data and prepare it for geocoding.

In [None]:
# Clean column names (remove spaces, lowercase)
df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns]

# Show columns after cleaning
df.columns

In [None]:
# Check for missing values
print("Missing values by column:")
df.isna().sum()

## 3. Geocoding

We need to convert location names to coordinates. Let's use a dictionary mapping locations to coordinates.

In [None]:
# This is a simplified approach - for a real project you would use a geocoding service
# or a more comprehensive dataset of Australian locations with coordinates

# Example mapping of Queensland regions to approximate coordinates
qld_regions = {
    'BRISBANE': {'lat': -27.4698, 'lon': 153.0251},
    'GOLD COAST': {'lat': -28.0167, 'lon': 153.4000},
    'SUNSHINE COAST': {'lat': -26.6500, 'lon': 153.0667},
    'LOGAN': {'lat': -27.6392, 'lon': 153.1086},
    'IPSWICH': {'lat': -27.6161, 'lon': 152.7610},
    'CAIRNS': {'lat': -16.9186, 'lon': 145.7781},
    'TOWNSVILLE': {'lat': -19.2590, 'lon': 146.8169},
    'TOOWOOMBA': {'lat': -27.5598, 'lon': 151.9507},
    'MACKAY': {'lat': -21.1412, 'lon': 149.1868},
    'ROCKHAMPTON': {'lat': -23.3791, 'lon': 150.5100},
    'BUNDABERG': {'lat': -24.8500, 'lon': 152.3500},
    'HERVEY BAY': {'lat': -25.2882, 'lon': 152.8730},
    'GLADSTONE': {'lat': -23.8430, 'lon': 151.2583},
    'MARYBOROUGH': {'lat': -25.5378, 'lon': 152.7020},
    'MOUNT ISA': {'lat': -20.7256, 'lon': 139.4927},
    # Add more regions as needed
}

In [None]:
# Check which locations we have in the data
locations = df['district'].unique()
print(f"Locations in the dataset: {len(locations)}")
locations[:20]  # Show the first 20

In [None]:
# Function to add coordinates to the dataframe
def add_coordinates(row):
    location = row['district']
    if location in qld_regions:
        return pd.Series([qld_regions[location]['lat'], qld_regions[location]['lon']])
    else:
        # Default to Brisbane for unknown locations - you may want to handle this differently
        return pd.Series([None, None])

# Apply the function to add latitude and longitude columns
df[['latitude', 'longitude']] = df.apply(add_coordinates, axis=1)

# Check how many locations were successfully geocoded
print(f"Locations with coordinates: {df['latitude'].notna().sum()} out of {len(df)}")

# Display sample with coordinates
df[df['latitude'].notna()].head()

## 4. Aggregate Data

Now, let's aggregate the data by location to get the total number of infringements per location.

In [None]:
# Group by location and sum the number of infringements
location_counts = df.groupby(['district', 'latitude', 'longitude'])['count'].sum().reset_index()

# Sort by count in descending order
location_counts = location_counts.sort_values('count', ascending=False)

# Display the top locations by infringement count
location_counts.head(10)

## 5. Transform to GeoJSON

Now let's convert our aggregated data to GeoJSON format for the heatmap.

In [None]:
# Function to normalize values to a 0-100 scale for intensity
def normalize_values(series):
    min_val = series.min()
    max_val = series.max()
    return 100 * (series - min_val) / (max_val - min_val)

# Normalize the counts to get intensity values between 0-100
location_counts['intensity'] = normalize_values(location_counts['count'])

# Remove rows with missing coordinates
geo_data = location_counts.dropna(subset=['latitude', 'longitude'])

# Create GeoJSON feature collection
features = []
for _, row in geo_data.iterrows():
    feature = {
        "type": "Feature",
        "properties": {
            "intensity": float(row['intensity']),
            "location": row['district'],
            "count": int(row['count'])
        },
        "geometry": {
            "type": "Point",
            "coordinates": [float(row['longitude']), float(row['latitude'])]
        }
    }
    features.append(feature)

# Create the GeoJSON structure
geojson_data = {
    "type": "FeatureCollection",
    "features": features
}

# Preview the first feature
geojson_data["features"][0]

In [None]:
# Save the GeoJSON data to a file
output_file = OUTPUT_DIR / 'infringements.json'
with open(output_file, 'w') as f:
    json.dump(geojson_data, f, indent=2)

print(f"GeoJSON data saved to {output_file}")

In [None]:
# Also save a CSV version for compatibility
csv_output = OUTPUT_DIR / 'infringements.csv'
geo_data[['latitude', 'longitude', 'intensity']].to_csv(csv_output, index=False)
print(f"CSV data saved to {csv_output}")

## 6. Copy to Web App

Finally, let's copy the processed files to the client/data directory so they can be used by the web application.

In [None]:
import shutil

# Define paths
client_data_dir = ROOT_DIR / 'client' / 'data'

# Ensure the client data directory exists
os.makedirs(client_data_dir, exist_ok=True)

# Copy the GeoJSON file
shutil.copy(output_file, client_data_dir / 'data.json')

# Copy the CSV file
shutil.copy(csv_output, client_data_dir / 'data.csv')

print(f"Files copied to web app directory: {client_data_dir}")

## 7. Visualization

Let's create a simple visualization to preview how our data might look on a map.

In [None]:
# Install and import folium if needed
import folium
from folium.plugins import HeatMap

# Create a base map centered on Australia
m = folium.Map(location=[-25.2744, 133.7751], zoom_start=4)

# Prepare data for heatmap
heat_data = [[row['latitude'], row['longitude'], row['intensity']] for _, row in geo_data.iterrows()]

# Add the heatmap
HeatMap(heat_data).add_to(m)

# Save the map
map_file = OUTPUT_DIR / 'preview_map.html'
m.save(map_file)

print(f"Preview map saved to {map_file}")
m