### Configurations

In [2]:
!pip install psycopg2-binary

Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.11-cp313-cp313-win_amd64.whl.metadata (5.1 kB)
Downloading psycopg2_binary-2.9.11-cp313-cp313-win_amd64.whl (2.7 MB)
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
   --- ------------------------------------ 0.3/2.7 MB ? eta -:--:--
   --- ------------------------------------ 0.3/2.7 MB ? eta -:--:--
   --- ------------------------------------ 0.3/2.7 MB ? eta -:--:--
   --- ------------------------------------ 0.3/2.7 MB ? eta -:--:--
   --- ------------------------------------ 0.3/2.7 MB ? eta -:--:--
   ------- -------------------------------- 0.5/2.7 MB 272.9 kB/s eta 0:00:09
   ------- -------------------------------- 0.5/2.7 MB 272.9 kB/s eta 0:00:09
   ----------- ---------------------------- 0

In [2]:
from kafka import KafkaConsumer
import psycopg2
import json

In [47]:
# Kafka
bootstrap_server = 'localhost:9092'
topic = 'raw_sensor_data_ingestion'

# Kafka consumer configuration
consumer = KafkaConsumer(
    topic,
    bootstrap_servers=[bootstrap_server],
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    # group_id='my-group',
    value_deserializer=lambda m: json.loads(m.decode('utf-8'))
)

# PostgreSQL connection config
conn = psycopg2.connect(
    dbname='iot_data_analytic',
    user='admin',
    password='admin',
    host='localhost',
    port='5432'
)
cursor = conn.cursor()

### DB Vehical details table CRUD operations

In [43]:
def create_vehical_hourly_summary(data):
    """
    data: dict with keys:
      - sensor_id (str)
      - avg_vehical_count_hourly (str)
      - vehical_count_per_hour (numeric / float / int, optional, default 0)
      - record_count (numeric / float / int, optional, default 0)
    Returns: inserted row id or None on error
    """
    try:
        insert_query = """
            INSERT INTO public.vehical_hourly_summary
                (sensor_id,
                 avg_vehical_count_hourly,
                 vehical_count_per_hour,
                 record_count, hour)
            VALUES
                (%(sensor_id)s,
                 %(avg_vehical_count_hourly)s,
                 %(vehical_count_per_hour)s,
                 %(record_count)s, %(hour)s)
            RETURNING id;
        """

        mapping = {
            "sensor_id": data["sensor_id"],
            "avg_vehical_count_hourly": data["avg_vehical_count_hourly"],
            "vehical_count_per_hour": data.get("vehical_count_per_hour", 0),
            "record_count": data.get("record_count", 0),
            "hour": data.get("hour", -1)
        }

        cursor.execute(insert_query, mapping)
        new_id = cursor.fetchone()[0]
        conn.commit()
        print(f"Inserted vehical_hourly_summary id={new_id}")
        return new_id
    except Exception as e:
        conn.rollback()
        print(f"Error inserting vehical_hourly_summary: {e}")
        return None
    

def get_vehical_hourly_summary_by_id(record_id):
    """
    Returns a dict for the given id, or None if not found.
    """
    try:
        query = """
            SELECT
                id,
                sensor_id,
                avg_vehical_count_hourly,
                created_date,
                updated_at,
                vehical_count_per_hour,
                record_count,
                hour
            FROM public.vehical_hourly_summary
            WHERE id = %s;
        """

        cursor.execute(query, (record_id,))
        row = cursor.fetchone()

        if not row:
            print(f"No vehical_hourly_summary found with id={record_id}")
            return None

        col_names = [desc[0] for desc in cursor.description]
        result = dict(zip(col_names, row))
        return result

    except Exception as e:
        print(f"Error retrieving vehical_hourly_summary id={record_id}: {e}")
        return None



def get_vehical_hourly_summaries(sensor_id=None):
    """
    Returns a list of dicts.
    If sensor_id is given, only records for that sensor are returned.
    """
    try:
        base_query = """
            SELECT
                id,
                sensor_id,
                avg_vehical_count_hourly,
                created_date,
                updated_at,
                vehical_count_per_hour,
                record_count,
                hour
            FROM public.vehical_hourly_summary
        """

        if sensor_id is not None:
            query = base_query + " WHERE sensor_id = %s ORDER BY created_date DESC;"
            params = (sensor_id,)
        else:
            query = base_query + " ORDER BY created_date DESC;"
            params = None

        cursor.execute(query, params)
        rows = cursor.fetchall()

        col_names = [desc[0] for desc in cursor.description]
        results = [dict(zip(col_names, row)) for row in rows]

        print(f"Retrieved {len(results)} vehical_hourly_summary record(s)")
        return results

    except Exception as e:
        print(f"Error retrieving vehical_hourly_summary records: {e}")
        return []


def update_vehical_hourly_summary(record_id, data):
    """
    data: dict with any of:
      - sensor_id
      - avg_vehical_count_hourly
      - vehical_count_per_hour
      - record_count
      - hour
    Only provided fields will be updated.
    """
    try:
        # Build dynamic SET clause
        fields = []
        params = {}

        if "sensor_id" in data:
            fields.append("sensor_id = %(sensor_id)s")
            params["sensor_id"] = data["sensor_id"]

        if "avg_vehical_count_hourly" in data:
            fields.append("avg_vehical_count_hourly = %(avg_vehical_count_hourly)s")
            params["avg_vehical_count_hourly"] = data["avg_vehical_count_hourly"]

        if "vehical_count_per_hour" in data:
            fields.append("vehical_count_per_hour = %(vehical_count_per_hour)s")
            params["vehical_count_per_hour"] = data["vehical_count_per_hour"]

        if "record_count" in data:
            fields.append("record_count = %(record_count)s")
            params["record_count"] = data["record_count"]

        if "hour" in data:
            fields.append("hour = %(hour)s")
            params["hour"] = data["hour"]

        if not fields:
            print("No fields to update.")
            return False

        # Always update updated_at
        fields.append("updated_at = CURRENT_TIMESTAMP")

        update_query = f"""
            UPDATE public.vehical_hourly_summary
            SET {', '.join(fields)}
            WHERE id = %(id)s;
        """

        params["id"] = record_id

        cursor.execute(update_query, params)
        conn.commit()

        if cursor.rowcount == 0:
            print(f"No vehical_hourly_summary updated, id={record_id} not found.")
            return False

        print(f"Updated vehical_hourly_summary id={record_id}")
        return True

    except Exception as e:
        conn.rollback()
        print(f"Error updating vehical_hourly_summary id={record_id}: {e}")
        return False


def delete_vehical_hourly_summary(record_id):
    """
    Delete a record by id.
    Returns True if a row was deleted, False otherwise.
    """
    try:
        delete_query = """
            DELETE FROM public.vehical_hourly_summary
            WHERE id = %s;
        """

        cursor.execute(delete_query, (record_id,))
        conn.commit()

        if cursor.rowcount == 0:
            print(f"No vehical_hourly_summary deleted, id={record_id} not found.")
            return False

        print(f"Deleted vehical_hourly_summary id={record_id}")
        return True

    except Exception as e:
        conn.rollback()
        print(f"Error deleting vehical_hourly_summary id={record_id}: {e}")
        return False


##### Usage details# Create
new_id = create_vehical_hourly_summary({
    "sensor_id": "CAM-00123",
    "avg_vehical_count_hourly": "145.3",
    "vehical_count_per_hour": 160,
    "record_count": 12,
})

##### Read one
row = get_vehical_hourly_summary_by_id(new_id)

##### Update
update_vehical_hourly_summary(new_id, {
    "avg_vehical_count_hourly": "150.0",
    "vehical_count_per_hour": 170,
})

##### Delete
delete_vehical_hourly_summary(new_id)

### Logic calculations

In [44]:
def calculate_hourly_details(data, existing_records, mapping):

    # Update the record count by one
    record_count = existing_records["record_count"] + 1  # including the new record
    mapping["record_count"] = record_count

    # Calculate total vehical count
    total_vehical_count = data["volume"] + existing_records["vehical_count_per_hour"]
    mapping["vehical_count_per_hour"] = total_vehical_count

    # Calculate avg vehical count per sensor_id
    avg_vehical_count_hourly = total_vehical_count / record_count
    mapping["avg_vehical_count_hourly"] = avg_vehical_count_hourly 

    return mapping

def daily_peak_volume(data, existing_records, mapping):
    return

def daily_sensor_availability(data, existing_records, mapping):
    return

def update_details(data):

    # Map the record fields into mapping object
    mapping = {
        "sensor_id": data["atd_device_id"],
        "avg_vehical_count_hourly": 0, # will be calculated in step 02
        "vehical_count_per_hour": data.get("volume", 0), # default to 0 if not present, for already exist records, add this amount to the existing value
        "record_count": 0,  # will be calculated in step 01
        "hour": data.get("hour", -1)  # default to -1 if not present
    }

    # Check if a record exists for this sensor_id
    existing_records = get_vehical_hourly_summaries(sensor_id=mapping["sensor_id"])
    if existing_records:

        # Calculate Hourly Vehical Details
        mapping = calculate_hourly_details(data, existing_records[0], mapping)

        # Calculate daily peak volume across all sensors

        # Calculate daily sensor availability percentage(%)


        # Calculate daily traffic volume across all sensors
        # mapping["avg_vehical_count_hourly"] = total_vehical_count / total_records if total_records > 0 else 0

        # Daily sensor availability percentage
        
        


        # Update the most recent record
        latest_record = existing_records[0]
        update_vehical_hourly_summary(latest_record["id"], mapping)
        
    else:

        # 1. Update the record count by one
        mapping["record_count"] = 1  # including the new record

        # 2. calculate avg vehical count per sensor_id
        avg_vehical_count_hourly = data["volume"] / 1  # record count taken as 1 since the first record
        mapping["avg_vehical_count_hourly"]
        total_records = len(existing_records)

        # Insert a new record
        create_vehical_hourly_summary(mapping)  

    return

### Consume messages and insert into database

In [48]:
try:
    for message in consumer:
        data = message.value  # This is a dict after JSON deserialization

        # Insert/update data into PG table
        update_details(data)

except KeyboardInterrupt:
    print('Stopped consuming.')

finally:
    cursor.close()
    conn.close()
    consumer.close()

Retrieved 1 vehical_hourly_summary record(s)
Updated vehical_hourly_summary id=3419
Stopped consuming.
