In [None]:
import json
from kafka3 import KafkaConsumer
import matplotlib.pyplot as plt
from collections import Counter
from datetime import datetime
import matplotlib.dates as mdates

# Kafka setup
topic = "mongo_stream_plot"
kafka_server = "172.17.0.1:9092"

consumer = KafkaConsumer(
    topic,
    bootstrap_servers=[kafka_server],
    auto_offset_reset='earliest',
    value_deserializer=lambda m: json.loads(m.decode('utf-8'))
)

# For Jupyter: enable interactive plotting
%matplotlib notebook

# Setup figure and axes
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
fig.suptitle("Traffic Violations - Daily Count & Speed Over Time")

# Initialize containers
day_counts = Counter()
timestamp_container = []
speed_container = []

# Utility: annotate min/max on count plot
def annotate_count(ax, x_data, y_data):
    if not y_data:
        return
    min_y = min(y_data)
    max_y = max(y_data)
    xpos_min = x_data[y_data.index(min_y)]
    xpos_max = x_data[y_data.index(max_y)]

    ax.annotate(f'Min: {min_y}', xy=(xpos_min, min_y), xytext=(xpos_min, min_y - 1),
                arrowprops=dict(facecolor='blue', shrink=0.05))
    ax.annotate(f'Max: {max_y}', xy=(xpos_max, max_y), xytext=(xpos_max, max_y + 1),
                arrowprops=dict(facecolor='red', shrink=0.05))

# Utility: annotate min/max/avg on speed plot
def annotate_speed(ax, x_data, y_data):
    if not y_data:
        return
    min_y = min(y_data)
    max_y = max(y_data)
    avg_y = sum(y_data) / len(y_data)

    xpos_min = x_data[y_data.index(min_y)]
    xpos_max = x_data[y_data.index(max_y)]
    xpos_avg = x_data[-1]

    ax.annotate(f'Min: {min_y:.2f}', xy=(xpos_min, min_y), xytext=(xpos_min, min_y - 5),
                arrowprops=dict(facecolor='blue', shrink=0.05))
    ax.annotate(f'Max: {max_y:.2f}', xy=(xpos_max, max_y), xytext=(xpos_max, max_y + 5),
                arrowprops=dict(facecolor='red', shrink=0.05))
    ax.axhline(y=avg_y, color='green', linestyle='--', label=f'Avg: {avg_y:.2f}')
    ax.annotate(f'Avg: {avg_y:.2f}', xy=(xpos_avg, avg_y), xytext=(xpos_avg, avg_y + 3),
                arrowprops=dict(facecolor='green', shrink=0.05))

# Start consuming
print("[INFO] Consuming and plotting...")
while True:
    for message in consumer:
        event = message.value
        violations = event.get("violations", [])
        
        for violation in violations:
            ts_str = violation.get("timestamp_start")
            speed = violation.get("measured_speed")

            if not ts_str or speed is None:
                continue

            try:
                dt_obj = datetime.fromisoformat(ts_str)
            except Exception:
                continue

            # --- Update for count plot (left) ---
            dt_day = dt_obj.date()
            day_counts[dt_day] += 1

            if len(day_counts) > 10:
                # Keep only the latest 10 days
                latest_days = sorted(day_counts.keys())[-10:]
                day_counts = Counter({k: day_counts[k] for k in latest_days})

            # --- Update for speed plot (right) ---
            timestamp_container.append(dt_obj)
            speed_container.append(float(speed))

            # Limit speed data to last 100 for visual clarity
            if len(timestamp_container) > 100:
                timestamp_container = timestamp_container[-100:]
                speed_container = speed_container[-100:]

            # --- Plotting ---

            # Clear axes
            ax1.clear()
            ax2.clear()

            # === Left: Daily Count ===
            days_sorted = sorted(day_counts.keys())
            counts_sorted = [day_counts[d] for d in days_sorted]
            ax1.plot(days_sorted, counts_sorted, marker='o', label="Violations")
            annotate_count(ax1, days_sorted, counts_sorted)
            ax1.set_title("Violations per Day")
            ax1.set_xlabel("Date")
            ax1.set_ylabel("Count")
            ax1.tick_params(axis='x', rotation=45)
            ax1.legend()

            # === Right: Speed over Time ===
            ax2.plot(timestamp_container, speed_container, marker='o', color='orange', label="Speed")
            annotate_speed(ax2, timestamp_container, speed_container)
            ax2.set_title("Speed of Violations Over Time")
            ax2.set_xlabel("Timestamp")
            ax2.set_ylabel("Speed")
            ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d\n%H:%M:%S'))
            ax2.tick_params(axis='x', rotation=45)
            ax2.legend()

            plt.tight_layout()
            fig.canvas.draw()
            fig.canvas.flush_events()
            plt.pause(0.1)


In [None]:
import json
from kafka3 import KafkaConsumer
import matplotlib.pyplot as plt
from collections import Counter
from datetime import datetime
import matplotlib.dates as mdates
import numpy as np

# Kafka setup
topic = "mongo_stream_plot"
kafka_server = "172.17.0.1:9092"

consumer = KafkaConsumer(
    topic,
    bootstrap_servers=[kafka_server],
    auto_offset_reset='earliest',
    value_deserializer=lambda m: json.loads(m.decode('utf-8'))
)

# For Jupyter: enable interactive plotting
# %matplotlib notebook

# Setup figure and axes
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
fig.suptitle("Traffic Violations - Daily Count & Speed Over Time")

# Initialize containers
day_counts = Counter()
timestamp_container = []
speed_container = []
previous_day_count = 0
speed_rolling_avg = []
rolling_window = 5  # For rolling average calculation
accumulate=[]
# --- Utility functions (with enhancements) ---
def annotate_count(ax, x_data, y_data):
    if not y_data:
        return
    min_y = min(y_data)
    max_y = max(y_data)
    xpos_min = x_data[y_data.index(min_y)]
    xpos_max = x_data[y_data.index(max_y)]

    ax.annotate(f'Min: {min_y}', xy=(xpos_min, min_y), xytext=(xpos_min, min_y - 1),
                arrowprops=dict(facecolor='blue', shrink=0.05))
    ax.annotate(f'Max: {max_y}', xy=(xpos_max, max_y), xytext=(xpos_max, max_y + 1),
                arrowprops=dict(facecolor='red', shrink=0.05))

def annotate_speed(ax, x_data, y_data):
    if not y_data:
        return
    min_y = min(y_data)
    max_y = max(y_data)
    avg_y = sum(y_data) / len(y_data)

    xpos_min = x_data[y_data.index(min_y)]
    xpos_max = x_data[y_data.index(max_y)]
    xpos_avg = x_data[-1]

    ax.annotate(f'Min: {min_y:.2f}', xy=(xpos_min, min_y), xytext=(xpos_min, min_y - 5),
                arrowprops=dict(facecolor='blue', shrink=0.05))
    ax.annotate(f'Max: {max_y:.2f}', xy=(xpos_max, max_y), xytext=(xpos_max, max_y + 5),
                arrowprops=dict(facecolor='red', shrink=0.05))
    ax.axhline(y=avg_y, color='green', linestyle='--', label=f'Avg: {avg_y:.2f}')
    ax.annotate(f'Avg: {avg_y:.2f}', xy=(xpos_avg, avg_y), xytext=(xpos_avg, avg_y + 3),
                arrowprops=dict(facecolor='green', shrink=0.05))

    # Percentile lines
    percentiles = np.percentile(y_data, [50, 75, 90])
    labels = ["50th (Median)", "75th", "90th"]
    colors = ['purple', 'magenta', 'cyan']
    for p, label, color in zip(percentiles, labels, colors):
        ax.axhline(y=p, color=color, linestyle=':', alpha=0.7, label=f'{label}: {p:.2f}')
        ax.annotate(f'{p:.2f}', xy=(x_data[-1], p), xytext=(x_data[-1], p + 2), color=color)

# Start consuming
print("[INFO] Consuming and plotting...")
plt.ion()  # Turn on interactive mode for non-Jupyter environments

while True:
    for message in consumer:
        event = message.value
        violations = event.get("violations", [])

        for violation in violations:
            ts_str = violation.get("timestamp_start")
            speed = violation.get("measured_speed")

            if not ts_str or speed is None:
                continue

            try:
                dt_obj = datetime.fromisoformat(ts_str)
            except Exception:
                continue

            # --- Update for count plot (left) ---
            dt_day = dt_obj.date()
            day_counts[dt_day] += 1
            
            if len(day_counts) > 10:
                latest_days = sorted(day_counts.keys())[-10:]
                day_counts = Counter({k: day_counts[k] for k in latest_days})

            # --- Update for speed plot (right) ---
            timestamp_container.append(dt_obj)
            speed_container.append(float(speed))

            if len(timestamp_container) > 100:
                timestamp_container = timestamp_container[-100:]
                speed_container = speed_container[-100:]

                # Update rolling average
                if len(speed_container) >= rolling_window:
                    speed_rolling_avg.append(np.mean(speed_container[-rolling_window:]))
                else:
                    speed_rolling_avg.append(np.mean(speed_container))
                    
            if len(day_counts) == 20:
                

            # --- Plotting ---
            ax1.clear()
            ax2.clear()

            # === Left: Daily Count ===
            days_sorted = sorted(day_counts.keys())
            counts_sorted = [day_counts[d] for d in days_sorted]
            ax1.plot(days_sorted, counts_sorted, marker='o', label="Violations")
            annotate_count(ax1, days_sorted, counts_sorted)

            # Highlight spike in daily count
            if len(counts_sorted) > 1 and counts_sorted[-1] > 2 * previous_day_count and previous_day_count > 0:
                ax1.annotate('Spike!', xy=(days_sorted[-1], counts_sorted[-1]),
                             xytext=(days_sorted[-1], counts_sorted[-1] + 2),
                             arrowprops=dict(facecolor='lime', shrink=0.05))
            previous_day_count = counts_sorted[-1] if counts_sorted else 0

            ax1.set_title("Violations per Day")
            ax1.set_xlabel("Date")
            ax1.set_ylabel("Count")
            ax1.tick_params(axis='x', rotation=45)
            ax1.legend()

            # === Right: Speed over Time ===
            ax2.plot(timestamp_container, speed_container, marker='o', color='orange', label="Speed")
            annotate_speed(ax2, timestamp_container, speed_container)

            # Highlight interesting speed points
            for ts, sp in zip(timestamp_container, speed_container):
                if sp > 150:
                    ax2.plot(ts, sp, 'r*', markersize=10, label='High Speed' if 'High Speed' not in ax2.get_legend_handles_labels()[1] else "")
                elif sp < 10:
                    ax2.plot(ts, sp, 'b*', markersize=10, label='Low Speed' if 'Low Speed' not in ax2.get_legend_handles_labels()[1] else "")

            # Highlight speed spikes/drops based on rolling average
            if len(speed_container) > rolling_window and len(speed_rolling_avg) > 1:
                current_speed = speed_container[-1]
                prev_avg = speed_rolling_avg[-2]
                if current_speed > 1.5 * prev_avg:
                    ax2.annotate('Speed Spike!', xy=(timestamp_container[-1], current_speed),
                                 xytext=(timestamp_container[-1], current_speed + 5),
                                 arrowprops=dict(facecolor='lime', shrink=0.05))
                elif current_speed < 0.6 * prev_avg:
                    ax2.annotate('Speed Drop!', xy=(timestamp_container[-1], current_speed),
                                 xytext=(timestamp_container[-1], current_speed - 5),
                                 arrowprops=dict(facecolor='red', shrink=0.05))

            ax2.set_title("Speed of Violations Over Time")
            ax2.set_xlabel("Timestamp")
            ax2.set_ylabel("Speed (km/h)")
            ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d\n%H:%M:%S'))
            ax2.tick_params(axis='x', rotation=45)
            ax2.legend()

            plt.tight_layout()
            fig.canvas.draw()
            fig.canvas.flush_events()
            plt.pause(0.1)

plt.ioff()
plt.show()