In [1]:
%pip install ipykernel kafka-python pandas matplotlib folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/b9/98/9ba4b9d2d07dd32765ddb4e4c189dcbdd7dca4d5a735e2e4ea756f40c36b/folium-0.16.0-py2.py3-none-any.whl (100kB)
[K    100% |████████████████████████████████| 102kB 1.5MB/s a 0:00:01
Collecting branca>=0.6.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/fc/be/720f85abacd654ec86f1431bc7c004eae74417bd9d0e7a2bc43601062627/branca-0.8.0-py3-none-any.whl
Collecting xyzservices (from folium)
[?25l  Downloading https://files.pythonhosted.org/packages/f1/76/1d508556ee4c279841c82521aea4e12496367929d88aec6dd3959f080cfa/xyzservices-2023.2.0-py3-none-any.whl (55kB)
[K    100% |████████████████████████████████| 61kB 4.1MB/s ta 0:00:01
[31mbranca 0.8.0 has requirement jinja2>=3, but you'll have jinja2 2.11.1 which is incompatible.[0m
Installing collected packages: branca, xyzservices, folium
Successfully installed branca-0.8.0 folium-0.16.0 xyzservices-2023.2.0
Note: you may need to restart the kerne

In [2]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
from kafka import KafkaConsumer
import json
from IPython.display import display, clear_output
import datetime
import folium
from folium.plugins import MarkerCluster
from IPython.display import IFrame
import io
import os

In [3]:
# Setup Kafka consumer for aggregated data
consumer = KafkaConsumer(
    'output',
    bootstrap_servers='kafka:9092',
    auto_offset_reset='latest',
    group_id='visualization-group',
    value_deserializer=lambda m: json.loads(m.decode('utf-8'))
)

# Initialize data storage
data_records = []

# Load medidores data
# Assuming medidores data is available in /app/medidores.json
medidores_path = '/app/medidores.json'
if os.path.exists(medidores_path):
    with open(medidores_path, 'r') as f:
        medidores = json.load(f)
    medidores_df = pd.DataFrame(medidores)
else:
    # Mock data if medidores.json is not available
    medidores_df = pd.DataFrame([
        {'id': 1, 'region': 'Samborondon', 'lat': -2.0650, 'long': -79.8610},
        {'id': 2, 'region': 'Daule', 'lat': -1.8700, 'long': -79.9800},
        # Add more medidores as needed
    ])

# Function to update visualizations
def update_dashboard(df, latest_data):
    clear_output(wait=True)
    
    # Parse window_start as datetime
    df['window_start'] = pd.to_datetime(df['window_start'])
    
    # Current data (latest window)
    latest_window = df['window_start'].max()
    latest_data_window = df[df['window_start'] == latest_window]
    
    # Historical data for trend
    historical_df = df[df['window_start'] <= latest_window]
    
    # Create a folium map centered around average coordinates
    avg_lat = medidores_df['lat'].mean()
    avg_long = medidores_df['long'].mean()
    m = folium.Map(location=[avg_lat, avg_long], zoom_start=12)
    marker_cluster = MarkerCluster().add_to(m)
    
    # Plot medidores with latest consumption
    for _, row in latest_data_window.iterrows():
        region = row['region']
        # Get medidores in this region
        medidores_region = medidores_df[medidores_df['region'] == region]
        for _, medidor in medidores_region.iterrows():
            folium.Marker(
                location=[medidor['lat'], medidor['long']],
                popup=f"Region: {row['region']}<br>Avg Consumo: {row['avg_consumo']:.2f} kWh<br>Peak: {'Yes' if row['is_peak'] else 'No'}",
                icon=folium.Icon(color='red' if row['is_peak'] else 'blue')
            ).add_to(marker_cluster)
    
    # Save map to HTML buffer
    map_html = 'map.html'
    m.save(map_html)
    
    # Display the map
    display(IFrame(map_html, width='700', height='500'))
    
    # Plot average consumption trends
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
    
    # Average Consumption Trend
    for region in historical_df['region'].unique():
        region_df = historical_df[historical_df['region'] == region]
        ax1.plot(region_df['window_start'], region_df['avg_consumo'], label=f'Avg {region}')
    
    ax1.set_title('Average Consumption Over Time')
    ax1.set_xlabel('Time')
    ax1.set_ylabel('Average Consumption (kWh)')
    ax1.legend()
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    plt.setp(ax1.get_xticklabels(), rotation=45, ha='right')
    
    # Peak Consumption Indicators
    peak_df = historical_df[historical_df['is_peak']]
    for region in peak_df['region'].unique():
        region_peaks = peak_df[peak_df['region'] == region]
        ax2.scatter(region_peaks['window_start'], region_peaks['avg_consumo'], label=f'Peak {region}', marker='x', color='red')
    
    ax2.set_title('Peak Consumption Indicators')
    ax2.set_xlabel('Time')
    ax2.set_ylabel('Average Consumption (kWh)')
    ax2.legend()
    ax2.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    plt.setp(ax2.get_xticklabels(), rotation=45, ha='right')
    
    plt.tight_layout()
    plt.show()
    
    # Display Indicators
    for _, row in latest_data_window.iterrows():
        print(f"Region: {row['region']}")
        print(f"Average Consumption: {row['avg_consumo']:.2f} kWh")
        print(f"Peak Detected: {'Yes' if row['is_peak'] else 'No'}")
        print("-" * 30)

# Real-time data consumption loop
try:
    for message in consumer:
        data = message.value
        data_records.append({
            'window_start': data['window_start'],
            'window_end': data['window_end'],
            'region': data['region'],
            'avg_consumo': data['avg_consumo'],
            'max_consumo': data['max_consumo'],
            'min_consumo': data['min_consumo'],
            'count_consumo': data['count_consumo'],
            'is_peak': data['is_peak']
        })
        
        # Convert to DataFrame
        df = pd.DataFrame(data_records)
        
        # Update the dashboard with the latest data
        update_dashboard(df, data)
        
except KeyboardInterrupt:
    print("Visualization stopped.")
finally:
    consumer.close()

No handles with labels found to put in legend.


ValueError: DateFormatter found a value of x=0, which is an illegal date; this usually occurs because you have not informed the axis that it is plotting dates, e.g., with ax.xaxis_date()

ValueError: DateFormatter found a value of x=0, which is an illegal date; this usually occurs because you have not informed the axis that it is plotting dates, e.g., with ax.xaxis_date()

<Figure size 864x720 with 2 Axes>