In [1]:
# IP USED THROUGHOUT THE ASSIGNMENT
hostip = "172.22.32.1"

%pip install folium

Collecting folium
  Using cached folium-0.18.0-py2.py3-none-any.whl (108 kB)
Collecting branca>=0.6.0
  Using cached branca-0.8.1-py3-none-any.whl (26 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.8.1 folium-0.18.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pymongo
from pymongo import MongoClient
from datetime import datetime
import csv
import folium
from folium import Map, Marker, Icon, Popup, PolyLine, CircleMarker
from folium.plugins import MarkerCluster
import branca
from pymongo import MongoClient
import pandas as pd
from datetime import datetime
from typing import Dict, Tuple, Optional

client = MongoClient(hostip, 27017) 
db = client.fit3182_db

In [5]:
from typing import Dict, Tuple, Optional
import pandas as pd
from datetime import datetime
from pymongo import MongoClient
import branca
from folium import Map, Marker, PolyLine, CircleMarker, Popup, Icon
from folium.plugins import MarkerCluster

# Configuration constants
MONGO_HOST: str = "172.22.32.1"
MONGO_PORT: int = 27017
MONGO_DB: str = "fit3182_db"
CAM_COLL: str = "Camera"
VIOL_COLL: str = "Violation"


def load_camera_locations() -> Dict[int, Tuple[float, float]]:
    """
    Load camera coordinates from MongoDB.

    Connect to the MongoDB instance and retrieve camera IDs with their associated
    latitude and longitude. Closes the connection after fetching.

    Returns
    -------
    Dict[int, Tuple[float, float]]
        Mapping from camera ID to (latitude, longitude).

    Raises
    ------
    pymongo.errors.PyMongoError
        If the database connection or query fails.
    """
    client = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
    coll = client[MONGO_DB][CAM_COLL]
    records = coll.find({}, {"_id": 1, "lat": 1, "long": 1})

    # Build a dict mapping camera IDs to their coordinates
    result: Dict[int, Tuple[float, float]] = {
        rec['_id']: (rec['lat'], rec['long'])
        for rec in records
    }

    client.close()
    return result


def fetch_violations_for(
    date_str: str,
    hour: Optional[int] = None
) -> pd.DataFrame:
    """
    Query the violations collection for a given date and optional hour.

    Parameters
    ----------
    date_str : str
        Date in 'YYYY-MM-DD' format to filter violations.
    hour : Optional[int], optional
        Hour of the day (0-23) to further filter the results. If provided,
        only violations matching this hour are returned.

    Returns
    -------
    pd.DataFrame
        Flattened DataFrame with columns:
            - car_plate: str
            - type: str
            - camera_id_start: int
            - camera_id_end: Optional[int]
            - timestamp_start: datetime
            - timestamp_end: Optional[datetime]
            - measured_speed: float
        Returns an empty DataFrame if no matching records are found.

    Raises
    ------
    ValueError
        If `date_str` is not in the expected YYYY-MM-DD format.
    """
    # Parse date and define query window
    try:
        start = datetime.strptime(date_str, "%Y-%m-%d")
    except ValueError as e:
        raise ValueError(f"Invalid date format: {date_str}") from e

    # Edge-case: simplistic next-day calculation may not handle month/year boundaries correctly
    end = datetime(start.year, start.month, start.day + 1)

    client = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
    coll = client[MONGO_DB][VIOL_COLL]

    # Fetch all violations for the day
    docs = list(coll.find({
        "date": {"$gte": start, "$lt": end}
    }))
    client.close()

    # Flatten documents into rows
    rows = []
    for doc in docs:
        plate = doc['car_plate']
        for v in doc.get('violations', []):
            rows.append({
                'car_plate': plate,
                'type': v['type'],
                'camera_id_start': v['camera_id_start'],
                'camera_id_end': v.get('camera_id_end'),
                'timestamp_start': v['timestamp_start'],
                'timestamp_end': v.get('timestamp_end'),
                'measured_speed': v['measured_speed']
            })

    df = pd.DataFrame(rows)
    if df.empty:
        return df

    # Normalize timestamps
    df['timestamp_start'] = pd.to_datetime(df['timestamp_start'])
    if hour is not None:
        df = df[df['timestamp_start'].dt.hour == hour]

    return df


def compute_map_center(
    df: pd.DataFrame,
    camera_locations: Dict[int, Tuple[float, float]]
) -> Tuple[float, float]:
    """
    Compute the geographic center (mean latitude and longitude) of start-camera locations.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame containing a 'camera_id_start' column.
    camera_locations : Dict[int, Tuple[float, float]]
        Mapping from camera IDs to coordinates.

    Returns
    -------
    Tuple[float, float]
        (latitude, longitude) of the computed map center.

    Raises
    ------
    ValueError
        If no valid camera locations are found in the DataFrame.
    """
    coords = [
        camera_locations[cid]
        for cid in df['camera_id_start'].unique()
        if cid in camera_locations
    ]
    if not coords:
        raise ValueError("No valid camera locations to compute center.")

    latitudes, longitudes = zip(*coords)
    # Mean of latitudes and longitudes
    return (sum(latitudes) / len(latitudes), sum(longitudes) / len(longitudes))


def add_violation_clusters(
    m: Map,
    df: pd.DataFrame,
    camera_locations: Dict[int, Tuple[float, float]]
) -> None:
    """
    Add clustered markers for each violation on the map.

    Instantaneous violations use the start camera; average violations use the end camera.
    Markers are colored red for violations, blue otherwise.

    Parameters
    ----------
    m : folium.Map
        Folium map instance to add markers to.
    df : pd.DataFrame
        DataFrame of violations with camera IDs and timestamps.
    camera_locations : Dict[int, Tuple[float, float]]
        Mapping from camera IDs to (lat, lon).
    """
    cluster = MarkerCluster(name='Violations').add_to(m)

    for _, row in df.iterrows():
        # Select appropriate camera based on violation type
        camera_val = (
            row['camera_id_start']
            if row['type'] == 'instantaneous'
            else row['camera_id_end']
        )
        loc = camera_locations.get(camera_val)
        if not loc:
            # Skip if location unknown or missing
            continue

        # Red for 'VIOLATION', blue otherwise
        color = 'red' if 'VIOLATION' in row['type'].upper() else 'blue'

        # Determine correct timestamp field
        time_val = (
            row['timestamp_start']
            if row['type'] == 'instantaneous'
            else row['timestamp_end']
        )

        # Normalize string timestamps (strip 'Z' if present)
        if isinstance(time_val, str):
            ts = time_val.rstrip('Z')
            dt_obj = datetime.fromisoformat(ts)
        else:
            dt_obj = time_val

        time_str = dt_obj.strftime("%H:%M:%S")
        popup = (
            f"<b>Camera:</b> {int(camera_val)}<br>"
            f"<b>Plate:</b> {row['car_plate']}<br>"
            f"<b>Type:</b> {row['type']}<br>"
            f"<b>Time:</b> {time_str}<br>"
            f"<b>Speed:</b> {row['measured_speed']}"
        )

        Marker(location=loc, popup=popup, icon=Icon(color=color))\
            .add_to(cluster)


def add_segment_polylines(
    m: Map,
    df: pd.DataFrame,
    camera_locations: Dict[int, Tuple[float, float]],
    colormap: branca.colormap.LinearColormap
) -> None:
    """
    Draw polylines between cameras for 'average' violations, colored by count.

    Parameters
    ----------
    m : folium.Map
        Folium map instance to draw segments on.
    df : pd.DataFrame
        DataFrame of violations including start/end camera IDs.
    camera_locations : Dict[int, Tuple[float, float]]
        Mapping from camera IDs to (lat, lon).
    colormap : branca.colormap.LinearColormap
        Colormap for encoding violation counts.
    """
    seg = df[df['type'] == 'average'][['camera_id_start', 'camera_id_end']]
    if seg.empty:
        return

    # Count number of average violations per segment
    counts = (
        seg.groupby(['camera_id_start', 'camera_id_end'])
           .size()
           .reset_index(name='count')
    )

    for _, row in counts.iterrows():
        a, b, cnt = row['camera_id_start'], row['camera_id_end'], row['count']
        loc_a = camera_locations.get(a)
        loc_b = camera_locations.get(b)
        if not loc_a or not loc_b:
            # Skip incomplete segments
            continue

        color = colormap(cnt)
        PolyLine(
            locations=[loc_a, loc_b],
            color=color,
            weight=4,
            opacity=0.8,
            popup=(f"<b>Camera {a}\u2192{b}:</b><br>"
                   f"<b>{int(cnt)}</b> AVG Violations")
        ).add_to(m)


def add_camera_hotspots(
    m: Map,
    df: pd.DataFrame,
    camera_locations: Dict[int, Tuple[float, float]],
    colormap: branca.colormap.LinearColormap
) -> None:
    """
    Overlay circle markers at camera locations, sized and colored by total violations.

    Instantaneous violations map to start cameras; average to end cameras.
    Marker radius is sqrt-scaled to improve visual discriminability.

    Parameters
    ----------
    m : folium.Map
        Folium map instance to add hotspots to.
    df : pd.DataFrame
        DataFrame of violations.
    camera_locations : Dict[int, Tuple[float, float]]
        Mapping from camera IDs to coordinates.
    colormap : branca.colormap.LinearColormap
        Colormap for total violation counts.
    """
    df = df.copy()
    df['camera_val'] = df.apply(
        lambda row: row['camera_id_start']
        if row['type'] == 'instantaneous'
        else row.get('camera_id_end'),
        axis=1
    )
    df = df[df['camera_val'].notna()]
    df = df[df['camera_val'].apply(lambda cid: cid in camera_locations)]

    if df.empty:
        return

    totals = (
        df['camera_val']
          .value_counts()
          .reset_index()
          .rename(columns={'index': 'cam', 'camera_val': 'total'})
    )

    # Optionally adjust colormap bounds based on actual data distribution
    # colormap.vmin = totals['total'].min()
    # colormap.vmax = totals['total'].max()

    for _, row in totals.iterrows():
        cam, total = int(row['cam']), int(row['total'])
        loc = camera_locations.get(cam)
        if not loc:
            continue

        c = colormap(total)
        CircleMarker(
            location=loc,
            radius=4 + total**0.5,  # sqrt‐scale for more balanced sizes
            color=c,
            fill=True,
            fill_color=c,
            fill_opacity=0.6,
            popup=Popup(
                f"<b>Camera {cam}</b><br>"
                f"<b>Total violations:</b> {total}",
                parse_html=True
            )
        ).add_to(m)


def create_violations_map(
    date_str: str,
    hour: Optional[int],
    output: str = 'violations_map.html'
) -> Optional[Map]:
    """
    Generate and return a Folium map visualizing violations for a given date and hour.

    This function coordinates loading locations, fetching and filtering data,
    computing map center, and adding layers for clusters, segments, and hotspots.

    Parameters
    ----------
    date_str : str
        Date in 'YYYY-MM-DD' format for which to generate the map.
    hour : Optional[int]
        Specific hour (0-23) to filter violations, or None for entire day.
    output : str, optional
        File path to save the generated HTML map, by default 'violations_map.html'.

    Returns
    -------
    folium.Map or None
        Folium Map object if data available; None if no violations found.

    Notes
    -----
    - Map title is added as a centered HTML header.
    - The colormap is fixed from 0 to 200; adjust for larger datasets as needed.
    """
    camera_locations = load_camera_locations()
    df = fetch_violations_for(date_str, hour)
    if df.empty:
        print('No violations to plot.')  # Inform user; no file generated
        return None

    center = compute_map_center(df, camera_locations)
    m = Map(location=center, zoom_start=13)

    # Define a linear color scale for average violations per segment
    colormap = branca.colormap.LinearColormap(
        ['green', 'yellow', 'red'], vmin=0, vmax=200,
        caption='Avg Violations per Segment'
    )
    colormap.add_to(m)

    add_violation_clusters(m, df, camera_locations)
    add_segment_polylines(m, df, camera_locations, colormap)
    add_camera_hotspots(m, df, camera_locations, colormap)

    # Add a map title overlay
    m.get_root().html.add_child(
        branca.element.Element(
            f"<h3 style='text-align:center'>Violations on {date_str}"
            + (f" at {hour:02d}:00" if hour is not None else '')
            + '</h3>'
        )
    )
    return m


In [6]:
date_str = input('Enter date (YYYY-MM-DD): ')
hour_in  = input('Enter hour (0–23) or leave blank: ')
hour = int(hour_in) if hour_in.strip() else None
create_violations_map(date_str, hour)

Enter date (YYYY-MM-DD): 2024-01-01
Enter hour (0–23) or leave blank: 8
