In [3]:
import dash
import dash_bootstrap_components as dbc
from dash import Input, Output, dcc, html,dash_table
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import calendar
from sklearn.cluster import KMeans
import numpy as np

app = dash.Dash(suppress_callback_exceptions=True,external_stylesheets=[dbc.themes.BOOTSTRAP])

columns_to_include = ['x_Timestamp', 't_kWh', 'meter']
df = pd.read_csv('CEEW.csv', usecols=columns_to_include)
df = df.dropna(subset=['t_kWh'])
df=df[~df['meter'].isin(['BR09', 'BR20', 'BR17','BR23','BR42','BR44','BR48'])]
df['x_Timestamp'] = pd.to_datetime(df['x_Timestamp'])

month_names = {i: calendar.month_name[i] for i in range(1, 13)}

meter_data = df[df['meter'] == 'BR06']
print(len(meter_data), len(df))
ts = pd.Series(meter_data['t_kWh'])
start_date = '2020-08-01'
end_date = '2020-08-31'

meters = df['meter'].unique()
n=len(meters)
print("No. of unique meters :", n)


unique_meters = df['meter'].unique()
dropdown_options = [{'label': meter, 'value': meter} for meter in unique_meters]

unique_months = pd.to_datetime(df['x_Timestamp']).dt.strftime('%B %Y').unique()
month_dropdown_options = [{'label': month.split()[0], 'value': pd.to_datetime(month).month} for month in unique_months]


SIDEBAR_STYLE = {
    "position": "fixed",
    "top": 0,
    "left": 0,
    "bottom": 0,
    "width": "16rem",
    "padding": "2rem 1rem",
    "background-color": "#f8f9fa",
}

CONTENT_STYLE = {
    "margin-left": "18rem",
    "margin-right": "2rem",
    "padding": "2rem 1rem",
}

sidebar = html.Div(
    [
        html.H2("Energy Analytics", className="display-3"),
        html.Hr(),
        html.P("Smart Meter Analysis", className="lead"),
        dbc.Nav(
            [
                dbc.NavLink("Meter Raw Data", href="/", active="exact"),
                dbc.NavLink("Time Series Plots", href="/page-1", active="exact"),
                dbc.NavLink("Hourly Consumption Curves", href="/page-2", active="exact"),
                dbc.NavLink("Montly Average Load Curves", href="/page-3", active="exact"),
                dbc.NavLink("K-Means Clustering RLCs", href="/page-4", active="exact"),
            ],
            vertical=True,
            pills=True,
        ),
        html.Hr(),
        html.H4("IISc Bengaluru", className="lead")
    ],
    style=SIDEBAR_STYLE,
)

content = html.Div(id="page-content", style=CONTENT_STYLE)
app.layout = html.Div([dcc.Location(id="url"), sidebar, content])


@app.callback(Output("page-content", "children"), [Input("url", "pathname")])
def render_page_content(pathname):
    if pathname == "/":
        return html.Div([
            html.Div(children='Meter Data', style={'fontSize': '24px', 'fontWeight': 'bold'}),
            dcc.Dropdown(
                id='meter-dropdown',
                options=dropdown_options,
                value=unique_meters[0],  
                style={'width': '200px'}
            ),
            
             dash_table.DataTable(
                id='table',
                columns=[{'name': col, 'id': col} for col in columns_to_include],
                page_size=15,
                style_table={'overflowX': 'auto','textAlign': 'right'},  
            )
        ])
    
    elif pathname == "/page-1":
        
         return html.Div([
            html.Div(children='Power Consumption Time Series', style={'fontSize': '24px', 'fontWeight': 'bold'}),
            dcc.Dropdown(
                id='meter-dropdown1',
                options=dropdown_options,
                value=unique_meters[0],  
                style={'width': '200px'}
            ),
             
             dcc.Graph(id='timeseries-plot')
        ])
        
    elif pathname == "/page-2":
        return html.Div([
            html.Div(children='Hourly Consumption Curves', style={'fontSize': '24px', 'fontWeight': 'bold'}),
            dcc.Dropdown(
                id='meter-dropdown2',
                options=dropdown_options,
                value=unique_meters[0],  
                style={'width': '200px'}
            ),
            
            dcc.Dropdown(
                id='month-dropdown',
                options=month_dropdown_options,  
                style={'width': '200px'}
            ),
        
            dcc.Graph(id='daily-hourly-plot')
        
        ])
    

    elif pathname == "/page-3":
        return html.Div([
            html.Div(children='Monthly Average Load Curves', style={'fontSize': '24px', 'fontWeight': 'bold'}),
            dcc.Dropdown(
                id='month-dropdown-page3',
                options=month_dropdown_options,  
                style={'width': '200px'}
            ),
        dcc.Graph(id='monthly-hourly-plot')
    ])

    elif pathname == "/page-4":
        return html.Div([
        html.Div(children='KMeans Clustering of Average Load Curves', style={'fontSize': '24px', 'fontWeight': 'bold'}),
        dcc.Dropdown(
            id='month-dropdown-page4',
            options=month_dropdown_options,
            value=2,  
            style={'width': '200px'}
        ),
        dcc.Dropdown(
            id='cluster-dropdown',
            options=[{'label': str(i), 'value': i} for i in range(2, 11)],  # Adjust range as needed
            value=3,  # Default value for number of clusters
            style={'width': '200px'}
        ),
        dcc.Graph(id='kmeans-clustering-plot')
    ])
    
    else:
        return html.Div(
            [
                html.H1("404: Not found", className="text-danger"),
                html.Hr(),
                html.P(f"The pathname {pathname} was not recognized..."),
            ],
            className="p-3 bg-light rounded-3",
        )

@app.callback(
    Output('table', 'data'),
    Input('meter-dropdown', 'value')
)

def update_table(selected_meter):
    filtered_df = df[df['meter'] == selected_meter]
    return filtered_df[columns_to_include].to_dict('records')


@app.callback(
    dash.dependencies.Output('timeseries-plot', 'figure'),
    [dash.dependencies.Input('meter-dropdown1', 'value')]
)
def update_timeseries(selected_meter):
    filtered_df = df[df['meter'] == selected_meter]
    
    trace = go.Scatter(
        x=filtered_df['x_Timestamp'],
        y=filtered_df['t_kWh'],
        mode='lines',
        line=dict(color='lightblue', width=2),
        name='Time Series'
    )

    layout = go.Layout(
        title=f'Time Series Plot - {selected_meter}',
        xaxis=dict(title='Date'),
        yaxis=dict(title='Value')
    )

    figure = {'data': [trace], 'layout': layout}
    return figure


@app.callback(
    Output('daily-hourly-plot', 'figure'),
    [Input('meter-dropdown2', 'value'),
     Input('month-dropdown', 'value')]  # Add month dropdown as an input
)

def update_daily_hourly_plot(selected_meter, selected_month):
    filtered_df = df[(df['meter'] == selected_meter) & (pd.to_datetime(df['x_Timestamp']).dt.month == selected_month)] 
    
    month_names = {i: calendar.month_name[i] for i in range(1, 13)}
    selected_month_name = month_names[selected_month]

   
    daily_curves = []
    for day in range(1, 32): 
        day_data = filtered_df[pd.to_datetime(filtered_df['x_Timestamp']).dt.day == day]
        hourly_data = day_data.groupby(day_data['x_Timestamp'].dt.hour)['t_kWh'].mean()
        trace = go.Scatter(
            x=hourly_data.index,  # Hours of the day (0-23)
            y=hourly_data.values,  # Mean consumption values
            mode='lines',
            line=dict(color='lightblue', width=1.5),  # Individual day curve color
            name=f'Day {day}'
        )
        daily_curves.append(trace)

    # Get monthly hourly average curve
    monthly_hourly_average = filtered_df.groupby(filtered_df['x_Timestamp'].dt.hour)['t_kWh'].mean()
    trace_monthly = go.Scatter(
        x=monthly_hourly_average.index,  # Hours of the day (0-23)
        y=monthly_hourly_average.values,  # Monthly hourly average consumption values
        mode='lines',
        line=dict(color='darkorange',width=2.5),  # Monthly average curve color
        name='Monthly Average'
    )
    daily_curves.append(trace_monthly)

    layout = go.Layout(
        title=f'Daily 24-Hour Consumption - {selected_meter} for {selected_month_name}',
        xaxis=dict(title='Hour of the Day'),
        yaxis=dict(title='Average Consumption')
    )

    figure = {'data': daily_curves, 'layout': layout}
    return figure

@app.callback(
    Output('monthly-hourly-plot', 'figure'),
    Input('month-dropdown-page3', 'value')
)

def update_monthly_hourly_plot(selected_month):
    filtered_df = df[pd.to_datetime(df['x_Timestamp']).dt.month == selected_month]

    month_names = {i: calendar.month_name[i] for i in range(1, 13)}
    selected_month_name = month_names[selected_month]
    
    monthly_curves = []
    for meter in unique_meters:
        meter_data = filtered_df[filtered_df['meter'] == meter]
        monthly_hourly_average = meter_data.groupby(meter_data['x_Timestamp'].dt.hour)['t_kWh'].mean()
        trace = go.Scatter(
            x=monthly_hourly_average.index,  # Hours of the day (0-23)
            y=monthly_hourly_average.values,  # Monthly hourly average consumption values
            mode='lines',
            name=f'{meter} - Monthly Average'
        )
        monthly_curves.append(trace)

    layout = go.Layout(
        title=f'Monthly Hourly Consumption - {selected_month_name}',
        xaxis=dict(title='Hour of the Day'),
        yaxis=dict(title='Average Consumption')
    )
    figure = {'data': monthly_curves, 'layout': layout}
    return figure

@app.callback(
    Output('kmeans-clustering-plot', 'figure'),
    [Input('month-dropdown-page4', 'value'),
     Input('cluster-dropdown', 'value')]
)
def update_kmeans_plot(selected_month, num_clusters):
    
    filtered_df = df[pd.to_datetime(df['x_Timestamp']).dt.month == selected_month]

    # Data for KMeans clustering
    X = filtered_df.groupby(['meter', filtered_df['x_Timestamp'].dt.hour])['t_kWh'].mean().unstack()

    # Applying KMeans clustering
    kmeans = KMeans(n_clusters=num_clusters, n_init=10, random_state=0)
    kmeans.fit(X)

    centers = kmeans.cluster_centers_
    labels = kmeans.labels_
    centroid_meters = df['meter'].unique()[:len(centers)]

    fig_cluster_centroids = []

    for i, cluster_center in enumerate(centers):
        trace = go.Scatter(
            x=list(range(24)),
            y=cluster_center,
            mode='lines',
            line=dict(color='darkblue', width=2),  
            name=f'Cluster {i + 1} Centroid'
        )
        fig_cluster_centroids.append(trace)

    for i, meter in enumerate(df['meter'].unique()):
       
        if meter not in centroid_meters:
            meter_data = df[(df['meter'] == meter) & (pd.to_datetime(df['x_Timestamp']).dt.month == selected_month)]
            hourly_data = meter_data.groupby(meter_data['x_Timestamp'].dt.hour)['t_kWh'].mean()
            trace = go.Scatter(
                x=list(range(24)),
                y=hourly_data,
                mode='lines',
                line=dict(color='orange', width=1),  
                name=f'{meter} - Cluster {labels[i]}'
            )
            fig_cluster_centroids.append(trace)

    layout = go.Layout(
        title=f'KMeans Clustering of Hourly Consumption Curves for Month {calendar.month_name[selected_month]}',
        xaxis=dict(title='Hour of the Day'),
        yaxis=dict(title='Average Consumption')
    )

    figure = {'data': fig_cluster_centroids, 'layout': layout}
    return figure

if __name__ == '__main__':
    app.run_server(debug=True, port=8051)


169920 5909280
No. of unique meters : 39



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



[1;31m---------------------------------------------------------------------------[0m
[1;31mKeyError[0m                                  Traceback (most recent call last)
[1;31mKeyError[0m: None




KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.


KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.



[1;31m---------------------------------------------------------------------------[0m
[1;31mKeyError[0m                                  Traceback (most recent call last)
[1;31mKeyError[0m: None

[1;31m---------------------------------------------------------------------------[0m
[1;31mKeyError[0m                                  Traceback (most recent call last)
[1;31mKeyError[0m: None




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



[1;31m---------------------------------------------------------------------------[0m
[1;31mKeyError[0m                                  Traceback (most recent call last)
[1;31mKeyError[0m: None




KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.



[1;31m---------------------------------------------------------------------------[0m
[1;31mKeyError[0m                                  Traceback (most recent call last)
[1;31mKeyError[0m: None

[1;31m---------------------------------------------------------------------------[0m
[1;31mKeyError[0m                                  Traceback (most recent call last)
[1;31mKeyError[0m: None




KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.



[1;31m---------------------------------------------------------------------------[0m
[1;31mKeyError[0m                                  Traceback (most recent call last)
[1;31mKeyError[0m: None



In [None]:
filtered_df = df[(df['meter'] == 'BR06') & (pd.to_datetime(df['x_Timestamp']).dt.month == 2)] 

In [None]:
filtered_df.head()