### Retrieval-Augmented Generation with energy prices

Location assumption based on temperature range:  **Denver, Colorado**

Price assumption: **15 cents per kWh (constant)**

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [53]:
df = pd.read_csv("data/processed/full_usage_data.csv", index_col=0)
df.head()

Unnamed: 0,use,gen,Dishwasher,Furnace_1,Furnace_2,Home_office,Fridge,Wine_cellar,Garage_door,Kitchen_12,...,apparentTemperature,pressure,windSpeed,cloudCover,windBearing,precipIntensity,dewPoint,precipProbability,kitchen,Furnace
2015-12-31 21:58:58,0.6316,0.003467,3.3e-05,0.158883,0.0639,0.0434,0.0825,0.009083,0.012983,0.000517,...,28.87,1015.98,8.2,0.75,281.0,0.0,23.39,0.0,0.000567,0.222783
2015-12-31 21:59:58,1.070017,0.0035,0.000233,0.497633,0.0629,0.043583,0.170883,0.008883,0.0125,0.00035,...,28.87,1015.98,8.2,0.75,281.0,0.0,23.39,0.0,0.000583,0.560533
2015-12-31 22:00:58,1.01625,0.003467,0.000217,0.466683,0.062767,0.043533,0.14955,0.0089,0.012567,0.000417,...,28.87,1015.98,8.2,0.75,281.0,0.0,23.39,0.0,0.000633,0.52945
2015-12-31 22:01:58,0.820667,0.003467,0.000217,0.44695,0.062917,0.043583,0.0105,0.007733,0.012617,0.000733,...,28.87,1015.98,8.2,0.75,281.0,0.0,23.39,0.0,0.001083,0.509867
2015-12-31 22:02:58,0.539167,0.003517,3.3e-05,0.100133,0.116283,0.043367,0.007617,0.007083,0.013133,0.000683,...,28.87,1015.98,8.2,0.75,281.0,0.0,23.39,0.0,0.000733,0.216417


In [54]:
df.reset_index(inplace=True)
df.rename(columns={"index": "timestamp"}, inplace=True)
df.head()

Unnamed: 0,timestamp,use,gen,Dishwasher,Furnace_1,Furnace_2,Home_office,Fridge,Wine_cellar,Garage_door,...,apparentTemperature,pressure,windSpeed,cloudCover,windBearing,precipIntensity,dewPoint,precipProbability,kitchen,Furnace
0,2015-12-31 21:58:58,0.6316,0.003467,3.3e-05,0.158883,0.0639,0.0434,0.0825,0.009083,0.012983,...,28.87,1015.98,8.2,0.75,281.0,0.0,23.39,0.0,0.000567,0.222783
1,2015-12-31 21:59:58,1.070017,0.0035,0.000233,0.497633,0.0629,0.043583,0.170883,0.008883,0.0125,...,28.87,1015.98,8.2,0.75,281.0,0.0,23.39,0.0,0.000583,0.560533
2,2015-12-31 22:00:58,1.01625,0.003467,0.000217,0.466683,0.062767,0.043533,0.14955,0.0089,0.012567,...,28.87,1015.98,8.2,0.75,281.0,0.0,23.39,0.0,0.000633,0.52945
3,2015-12-31 22:01:58,0.820667,0.003467,0.000217,0.44695,0.062917,0.043583,0.0105,0.007733,0.012617,...,28.87,1015.98,8.2,0.75,281.0,0.0,23.39,0.0,0.001083,0.509867
4,2015-12-31 22:02:58,0.539167,0.003517,3.3e-05,0.100133,0.116283,0.043367,0.007617,0.007083,0.013133,...,28.87,1015.98,8.2,0.75,281.0,0.0,23.39,0.0,0.000733,0.216417


In [55]:
# Set the timestamp column as the index

df["timestamp"] = pd.to_datetime(df["timestamp"])
df.set_index("timestamp", inplace=True)

In [56]:
# Aggregate by 1-hour intervals (sum energy usage)

hourly_data = df.use.resample("1H").mean()

  hourly_data = df.use.resample("1H").mean()


In [57]:
hourly_data.head()

timestamp
2015-12-31 21:00:00    0.850808
2015-12-31 22:00:00    0.733338
2015-12-31 23:00:00    0.939995
2016-01-01 00:00:00    0.656529
2016-01-01 01:00:00    1.223981
Freq: h, Name: use, dtype: float64

In [59]:
# Calculate energy cost directly (kWh * 15 cents)

hourly_data["Cost"] = hourly_data * 0.15

In [60]:
hourly_data

timestamp
2015-12-31 21:00:00                                             0.850808
2015-12-31 22:00:00                                             0.733338
2015-12-31 23:00:00                                             0.939995
2016-01-01 00:00:00                                             0.656529
2016-01-01 01:00:00                                             1.223981
                                             ...                        
2016-12-15 15:00:00                                             1.265857
2016-12-15 16:00:00                                             1.238106
2016-12-15 17:00:00                                             1.117068
2016-12-15 18:00:00                                              1.78941
Cost                   timestamp
2015-12-31 21:00:00    0.127621
2015...
Name: use, Length: 8399, dtype: object

In [61]:
hourly_data.Cost.resample("1M").sum()

  hourly_data.Cost.resample("1M").sum()


timestamp
2015-12-31      0.378621
2016-01-31    114.797279
2016-02-29     96.094473
2016-03-31     82.344247
2016-04-30     72.751574
2016-05-31     62.365480
2016-06-30     56.757286
2016-07-31    126.593922
2016-08-31    153.684981
2016-09-30     77.589502
2016-10-31     75.780391
2016-11-30     86.126381
2016-12-31     46.451547
Freq: ME, Name: use, dtype: float64