In [19]:
import time
import json
from kafka3 import KafkaProducer  # if kafka3 is your intended lib, else kafka-python
from datetime import datetime as dt
import pandas as pd

class kafkaProducer1:
    def __init__(self, csv_path: str, kafka_server: str, producer_id: str, topic: str, batch_interval: float = 5):
        self.kafka_server = kafka_server
        self.producer_id = producer_id
        self.topic = topic
        self.batch_interval = batch_interval

        # Load data, parse only timestamp_start
        try:
            self.df = pd.read_csv(
                csv_path,
                sep=',',
                parse_dates=["timestamp_start", "timestamp_end"]  # only timestamp_start parsed
            )
            # Drop timestamp_end column if exists
            if 'timestamp_end' in self.df.columns:
                self.df.drop(columns=['timestamp_end'], inplace=True)
            print(f"[INFO] Successfully loaded data from {csv_path}")
        except Exception as e:
            print(f"[ERROR] Failed to load CSV file {csv_path}: {e}")
            raise

        # Initialize Kafka producer
        try:
            self.producer = KafkaProducer(
                bootstrap_servers=[self.kafka_server],
                api_version=(0, 10),
                value_serializer=lambda v: json.dumps(v).encode('utf-8'),
            )
            print(f"[INFO] Connected to Kafka server at {self.kafka_server}")
        except Exception as e:
            print(f"[ERROR] Failed to connect to Kafka at {self.kafka_server}: {e}")
            raise

    def produce_all(self) -> None:
        print(f"[INFO] Starting to produce {len(self.df)} records...")

        for _, row in self.df.iterrows():
            event = row.to_dict()

            # Convert timestamp_start to ISO string
            if isinstance(event.get('timestamp_start'), pd.Timestamp):
                event['timestamp_start'] = event['timestamp_start'].isoformat()
            
            # Convert timestamp_end to ISO string
            if isinstance(event.get('timestamp_end'), pd.Timestamp):
                event['timestamp_end'] = event['timestamp_end'].isoformat()
            
            # Add producer metadata
            event['producer_id'] = self.producer_id
            event['sent_at'] = dt.now().isoformat()

            try:
                key_bytes = None
                if 'car_plate' in event and event['car_plate'] is not None:
                    key_bytes = event['car_plate'].encode('utf-8')

                self.producer.send(
                    self.topic,
                    key=key_bytes,
                    value=event,
                    timestamp_ms=int(time.time() * 1000)
                )
            except Exception as e:
                print(f"[WARN] Failed to send event with violation_id {event.get('violation_id')}: {e}")

        self.producer.flush()
        self.producer.close()
        print("[INFO] Finished producing all records and closed producer.")


In [None]:
#### TIMESTAMP PER HOUR AND COUNT OF VIOLATIONS WITH MIN AND MAX

import json
from kafka3 import KafkaConsumer  # or kafka-python
import matplotlib.pyplot as plt
from collections import Counter
from datetime import datetime

topic = "violation_historic"
kafka_server = "192.168.1.10:9092"

# Initialize Kafka consumer
consumer = KafkaConsumer(
    topic,
    bootstrap_servers=[kafka_server],
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    value_deserializer=lambda m: json.loads(m.decode('utf-8'))
)

# For Jupyter notebook use this line:
%matplotlib notebook


fig, ax = plt.subplots(figsize=(9.5, 6))
ax.set_xlabel('Time')
ax.set_ylabel('Count')
ax.set_title('Violations per Hour')

hour_counts = Counter()

def add_min_max(ax, x_data, y_data):
    if not y_data:
        return
    min_y = min(y_data)
    max_y = max(y_data)
    xpos_min = x_data[y_data.index(min_y)]
    xpos_max = x_data[y_data.index(max_y)]
    ax.annotate(f'Min: {min_y}', xy=(xpos_min, min_y), xytext=(xpos_min, min_y - 10),
                arrowprops=dict(facecolor='blue', shrink=0.05))
    ax.annotate(f'Max: {max_y}', xy=(xpos_max, max_y), xytext=(xpos_max, max_y + 10),
                arrowprops=dict(facecolor='red', shrink=0.05))

print("[INFO] Starting to consume and plot...")

while True:
    message_pack = consumer.poll(timeout_ms=1000, max_records=1)
    if not message_pack:
        continue
    for tp, messages in message_pack.items():
        for message in messages:
            event = message.value
            ts_str = event.get("timestamp_start")
            if not ts_str:
                continue
            try:
                dt_obj = datetime.fromisoformat(ts_str)
            except Exception:
                continue

            # Round/truncate to hour
            dt_hour = dt_obj.replace(minute=0, second=0, microsecond=0)
            print("we are in this hour ", dt_hour)

            # Update counts
            hour_counts[dt_hour] += 1

            # Prepare sorted data for plotting
            hours_sorted = sorted(hour_counts.keys())
            counts_sorted = [hour_counts[h] for h in hours_sorted]

            # Clear and replot
            ax.clear()
            ax.plot(hours_sorted, counts_sorted, marker='o', label="Violations")
            ax.set_title("Violations per Hour")
            ax.set_xlabel("Hour")
            ax.set_ylabel("Count")
            ax.tick_params(axis='x', rotation=45)
            ax.legend()

            # Format x-axis dates nicely
            ax.set_xticklabels([h.strftime('%Y-%m-%d %H:%M') for h in hours_sorted], rotation=45)

            # Add min/max annotations
            add_min_max(ax, hours_sorted, counts_sorted)

            fig.canvas.draw()
            fig.canvas.flush_events()

            plt.pause(0.1)

            # Limit to last 10 hours to keep plot clean
            if len(hours_sorted) > 10:
                oldest_hour = hours_sorted[0]
                del hour_counts[oldest_hour]


In [None]:
import json
from kafka3 import KafkaConsumer
import matplotlib.pyplot as plt
from datetime import datetime

# Kafka setup
topic = "violation_historic"
kafka_server = "172.17.0.1:9092"

consumer = KafkaConsumer(
    topic,
    bootstrap_servers=[kafka_server],
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    value_deserializer=lambda m: json.loads(m.decode('utf-8'))
)

# For Jupyter, enable interactive plotting
%matplotlib notebook

# Initialize plot
fig, ax = plt.subplots(figsize=(9.5, 6))
ax.set_xlabel('Time')
ax.set_ylabel('Speed')
ax.set_title('Speed of Violations Over Time')

# Containers
timestamp_container = []
speed_container = []

# Utility function to add annotations for min, max, avg
def add_stats(ax, x_data, y_data):
    if not y_data:
        return

    min_y = min(y_data)
    max_y = max(y_data)
    avg_y = sum(y_data) / len(y_data)

    xpos_min = x_data[y_data.index(min_y)]
    xpos_max = x_data[y_data.index(max_y)]
    xpos_avg = x_data[-1]  # annotate at the latest timestamp

    # Min annotation
    ax.annotate(f'Min: {min_y:.2f}', xy=(xpos_min, min_y), xytext=(xpos_min, min_y - 5),
                arrowprops=dict(facecolor='blue', shrink=0.05))

    # Max annotation
    ax.annotate(f'Max: {max_y:.2f}', xy=(xpos_max, max_y), xytext=(xpos_max, max_y + 5),
                arrowprops=dict(facecolor='red', shrink=0.05))

    # Average line and annotation
    ax.axhline(y=avg_y, color='green', linestyle='--', label=f'Avg: {avg_y:.2f}')
    ax.annotate(f'Avg: {avg_y:.2f}', xy=(xpos_avg, avg_y), xytext=(xpos_avg, avg_y + 5),
                arrowprops=dict(facecolor='green', shrink=0.05))

# Streaming loop
while True:
    message_pack = consumer.poll(timeout_ms=1000, max_records=1)
    if not message_pack:
        continue

    for tp, messages in message_pack.items():
        for message in messages:
            event = message.value

            # Parse timestamp and speed
            ts_str = event.get("timestamp_start")
            speed = event.get("speed")

            if ts_str is None or speed is None:
                continue

            try:
                timestamp = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S")
            except ValueError:
                continue

            timestamp_container.append(timestamp)
            speed_container.append(float(speed))

            # Clear and replot
            ax.clear()
            ax.set_title('Speed of Violations Over Time')
            ax.set_xlabel('Timestamp')
            ax.set_ylabel('Speed')
            ax.plot(timestamp_container, speed_container, marker='o', color='orange', label="Speed")
            add_stats(ax, timestamp_container, speed_container)
            ax.legend()
            plt.xticks(rotation=45)
            plt.tight_layout()
            plt.pause(0.01)
            ax.plot(timestamp_container, speed_container, marker='o', label="Speed")
            

            




In [7]:
import json
from kafka3 import KafkaConsumer

# Define topic and Kafka broker
topic = "violations"
kafka_server = "172.17.0.1:9092"

# Create Kafka consumer
consumer = KafkaConsumer(
    topic,
    bootstrap_servers=[kafka_server],
    auto_offset_reset='earliest',  # start from the earliest message
    enable_auto_commit=True,
    group_id="violation-consumer-group",
    value_deserializer=lambda m: json.loads(m.decode('utf-8'))
)

print(f"Consuming from topic: {topic} on {kafka_server}")

# Consume messages in a loop
try:
    for message in consumer:
        event = message.value
        print(f"Message received: {event}")
except KeyboardInterrupt:
    print("Stopped by user")
finally:
    consumer.close()


Consuming from topic: violations on 172.17.0.1:9092
Stopped by user
