In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objs as go

In [None]:
# dataset imported from kaggle at: https://www.kaggle.com/datasets/uciml/electric-power-consumption-data-set?resource=download
file_path = 'data/CLEANED_2010_household_power_consumption.zip'

df = pd.read_csv(file_path).iloc[:, 1:]
df['Date_time'] = pd.to_datetime(df['Date_time'])
print(df.shape)
df.head()

In [None]:
df.info()

In [None]:
fig = sp.make_subplots(rows=12, cols=1)
fig.update_layout(height=1000, width=1000)

for i, month in enumerate(range(1,13)):
    temp_df = df[(df['Date_time'].dt.month == month)].sort_values(by='Date_time')
    line_chart = go.Scatter(x=temp_df['Date_time'], y=temp_df['Voltage'], mode='lines', name=month)
    fig.add_trace(line_chart, row=i+1, col=1)
    

fig.update_layout(title_text="Voltage", showlegend=False)
fig.show()

## Applications

In [None]:
# Generate time values from 0 to 2*pi with small intervals
t = np.arange(0, 8 * np.pi, 0.1)

# Calculate sine values for each time value
y = 2*np.cos(t) + np.sin(3*t) + 3 * np.sin(2*t)

n = len(t)
fhat = np.fft.fft(y, n)
PSD = fhat * np.conj(fhat) / n
freq = (1/(n)) * np.arange(n)
L = np.arange(1, n//2, dtype='int')

indices = PSD > 500
PSD_clean = PSD * indices
fhat = indices * fhat
ffilt = np.fft.ifft(fhat)

# Plot the sine wave
fig = sp.make_subplots(rows=5, cols=1)
fig.update_layout(height=800)
line_chart1 = go.Scatter(x=t, y=y, mode='lines', name='Actual Voltage')
fig.add_trace(line_chart1, row=1, col=1)
    
line_chart2 = go.Scatter(x=freq[L], y=np.real(PSD[L]), mode='lines', name='Power Spectrum')
#fig.update_xaxes(tickvals=freq[L], row=2, col=1)
fig.add_trace(line_chart2, row=2, col=1)

# line_chart3 = go.Scatter(x=x, y=np.real(ffilt), mode='lines', name='Denoisified Voltage')
# fig.add_trace(line_chart3, row=3, col=1)
# fig.add_trace(line_chart1, row=4, col=1)

# window2 = df[(df['Date_time'] >= window2[0]) &
#             (df['Date_time'] <= window2[1])]
# line_chart4 = go.Scatter(x=window2['Date_time'], y=window2['Voltage'], mode='lines', name='Denoisified Voltage')
# fig.add_trace(line_chart4, row=5, col=1)
# line_chart5 = go.Scatter(x=window2['Date_time'], y=np.real(ffilt), mode='lines', name='Denoisified Voltage')
# fig.add_trace(line_chart5, row=5, col=1)



#fig.update_layout(title_text="Voltage", showlegend=False)
fig.show()

#### Frequency Analysis

In [None]:
#temp_df = df[(df['Date_time'].dt.year == 2009) & (df['Date_time'].dt.month == 9)].sort_values(by='Date_time')
window1 = ('2010-02-05', '2010-02-12')
window2 = ('2010-02-13', '2010-02-20')

window = df[(df['Date_time'] >= window1[0]) &
            (df['Date_time'] <= window1[1])].sort_values(by='Date_time')

x = window['Date_time']
y = window['Voltage']
n = len(x)
fhat = np.fft.fft(y, n)
PSD = fhat * np.conj(fhat) / n
freq = (1/(n)) * np.arange(n)
L = np.arange(1, n//2, dtype='int')


indices = PSD > 800
PSD_clean = PSD * indices
fhat = indices * fhat
ffilt = np.fft.ifft(fhat)

fig = sp.make_subplots(rows=5, cols=1)
fig.update_layout(height=800)
line_chart1 = go.Scatter(x=x, y=y, mode='lines', name='Actual Voltage')
fig.add_trace(line_chart1, row=1, col=1)
    
line_chart2 = go.Scatter(x=freq[L], y=np.real(PSD[L]), mode='lines', name='Power Spectrum')
fig.add_trace(line_chart2, row=2, col=1)

line_chart3 = go.Scatter(x=x, y=np.real(ffilt), mode='lines', name='Denoisified Voltage')
fig.add_trace(line_chart3, row=3, col=1)

fig.add_trace(line_chart1, row=4, col=1)
fig.add_trace(line_chart3, row=4, col=1)

window2 = df[(df['Date_time'] >= window2[0]) &
            (df['Date_time'] <= window2[1])]
line_chart4 = go.Scatter(x=window2['Date_time'], y=window2['Voltage'], mode='lines', name='Denoisified Voltage')
fig.add_trace(line_chart4, row=5, col=1)
line_chart5 = go.Scatter(x=window2['Date_time'], y=np.real(ffilt), mode='lines', name='Denoisified Voltage')
fig.add_trace(line_chart5, row=5, col=1)



#fig.update_layout(title_text="Voltage", showlegend=False)
fig.show()

In [None]:
temp_window = window.sample(1024, random_state=42).sort_values(by='Date_time')


In [None]:
state = 1
n_samples = 8192
temp_window = window.sample(n_samples, random_state=state).sort_values(by='Date_time')

window1 = ('2010-02-05', '2010-02-12')
window2 = ('2010-02-13', '2010-02-20')

x = temp_window['Date_time']
y = temp_window['Voltage']
n = len(x)
fhat = np.fft.fft(y)
PSD = fhat * np.conj(fhat) / n_samples
np.real(PSD[:10])

In [None]:
np.real(PSD[:len(freq)//2])[-2]

In [None]:

freq = (1/(n)) * np.arange(n)

indices = PSD > 800

In [None]:
fhat = indices * fhat
fhat[:10]


In [None]:
ffilt = np.fft.ifft(fhat)
np.real(np.real(ffilt))

In [None]:

fig = sp.make_subplots(rows=5, cols=1)
fig.update_layout(height=800)
line_chart1 = go.Scatter(x=x, y=y, mode='lines', name='Actual Voltage')
fig.add_trace(line_chart1, row=1, col=1)
    
line_chart2 = go.Scatter(x=freq[:len(freq)//2], y=np.real(PSD[:len(freq)//2]), mode='lines', name='Power Spectrum')
fig.add_trace(line_chart2, row=2, col=1)

line_chart3 = go.Scatter(x=x, y=np.real(ffilt), mode='lines', name='Denoisified Voltage')
fig.add_trace(line_chart3, row=3, col=1)

fig.add_trace(line_chart1, row=4, col=1)
fig.add_trace(line_chart3, row=4, col=1)

window2 = df[(df['Date_time'] >= window2[0]) &
            (df['Date_time'] <= window2[1])]

temp_window2 = window2.sample(n_samples, random_state=state).sort_values(by='Date_time')
line_chart4 = go.Scatter(x=temp_window2['Date_time'], y=temp_window2['Voltage'], mode='lines', name='Denoisified Voltage')
fig.add_trace(line_chart4, row=5, col=1)
line_chart5 = go.Scatter(x=temp_window2['Date_time'], y=np.real(ffilt), mode='lines', name='Denoisified Voltage')
fig.add_trace(line_chart5, row=5, col=1)

fig.show()

In [None]:
state = 1
n_samples = 8192
temp_window = window.sample(n_samples, random_state=state).sort_values(by='Date_time')

window1 = ('2010-02-05', '2010-02-12')
window2 = ('2010-02-13', '2010-02-20')

x = temp_window['Date_time']
y = temp_window['Voltage']
n = len(x)
fftresult = np.fft.fft(y)
magnitudes = np.abs(fftresult)

freq = (1/(n)) * np.arange(n)

In [None]:

magnitudes = np.abs(fftresult)
magnitudes[0]/n

In [None]:
np.real(fftresult[0])

In [None]:
indices = PSD > 100
# PSD_clean = PSD * indices
fhat = indices * fhat
ffilt = np.fft.ifft(fhat)

fig = sp.make_subplots(rows=5, cols=1)
fig.update_layout(height=800)
line_chart1 = go.Scatter(x=x, y=y, mode='lines', name='Actual Voltage')
fig.add_trace(line_chart1, row=1, col=1)
    
line_chart2 = go.Scatter(x=freq[:len(freq)//2], y=magnitudes[:len(freq)//2], mode='lines', name='Power Spectrum')
fig.add_trace(line_chart2, row=2, col=1)

line_chart3 = go.Scatter(x=x, y=np.real(fhat), mode='lines', name='Denoisified Voltage')
fig.add_trace(line_chart3, row=3, col=1)

# fig.add_trace(line_chart1, row=4, col=1)
# fig.add_trace(line_chart3, row=4, col=1)

# window2 = df[(df['Date_time'] >= window2[0]) &
#             (df['Date_time'] <= window2[1])]

# temp_window2 = window2.sample(n_samples, random_state=state).sort_values(by='Date_time')
# line_chart4 = go.Scatter(x=temp_window2['Date_time'], y=temp_window2['Voltage'], mode='lines', name='Denoisified Voltage')
# fig.add_trace(line_chart4, row=5, col=1)
# line_chart5 = go.Scatter(x=temp_window2['Date_time'], y=np.real(ffilt), mode='lines', name='Denoisified Voltage')
# fig.add_trace(line_chart5, row=5, col=1)

fig.show()

In [None]:
temp_window[['Date_time', 'Voltage']].to_json('data/electric_data_window.json', orient='records')
temp_window2[['Date_time', 'Voltage']].to_json('data/electric_data_next_window.json', orient='records')