In [None]:
# generate dummy customer smart meter data with daily usage using pandas

import pandas as pd
import numpy as np
import plotly.express as px

# generate a date range
date_range = pd.date_range(start='2024-08-01', end='2024-09-01', freq='15min')
date_range = date_range[:-1]

# create a dataframe with the date range
df = pd.DataFrame(index=date_range)

# generate random daily usage data
df['usage'] = np.sin(np.pi * (df.index.hour)/24) * 10 + np.random.randint(0, 5, len(df))

# add anomalies
anomaly_days = ['2024-08-10', '2024-08-20']
for day in anomaly_days:
    df.loc[day, 'usage'] = np.sin(np.pi * (df.loc[day, 'usage'].index.hour)/24) * 12 + np.random.randint(0, 5, len(df.loc[day, 'usage']))

# plot using plotly
fig = px.line(df, x=df.index, y='usage')
fig.show()

# save to csv
df.to_csv('smart_meter_data.csv')

In [None]:
from scipy.stats import zscore

# Aggregate the data to daily usage
df_daily = df.resample('D').sum()

# Calculate the Z-score for daily usage
df_daily['zscore'] = zscore(df_daily['usage'])

# Identify outliers (e.g., Z-score > 2 or < -2)
df_daily['outlier'] = df_daily['zscore'].abs() > 2

# add a column to the original dataframe to indicate if the day is an outlier
df['outlier'] = df.index.floor('D').isin(df_daily[df_daily['outlier']].index)

# plot original data with outliers highlighted with a red box covering the day
fig = px.line(df, x=df.index, y='usage', title='Daily Usage with Outliers Highlighted')
for outlier in df_daily[df_daily['outlier']].index:
    fig.add_vrect(x0=outlier, x1=outlier + pd.Timedelta(days=1), fillcolor='red', opacity=0.25, line_width=0, annotation_text='Outlier')
fig.show()