# Electricity Usage Analysis
The purpose of this notebook is to perform an analysis of historical electricity usage for the purposes of sizing a backup battery system for my home.

In [None]:
import pandas as pd
import plotly.express as px

In [None]:
# Import data
df = pd.read_csv("data/Trimmed_Electric_15_Minute_10-14-2022_5-13-2023_20230514.csv")
df.head()

In [None]:
# Reformat column names and add units to quantities
quantities = ["consumption", "generation", "net"]
df.columns = [
    str.replace(str.lower(x), " ", "_")
    if str.lower(x) not in quantities
    else str.replace(str.lower(x), " ", "_") + "_kwh"
    for x in df.columns
]
df.head()

In [None]:
# Create a DateTimeIndex
df.loc[:, "date_start_time"] = df.loc[:, "date"] + " " + df.loc[:, "start_time"]
df.index = pd.to_datetime(df.loc[:, "date_start_time"])
df.head()

In [None]:
# Remove unneeded columns
df.drop(
    columns=["date", "start_time", "duration", "date_start_time"],
    inplace=True,
)
df.head()

In [None]:
# Add some useful columns for grouping by time
df.loc[:, "date"] = df.index.date
df.loc[:, "hour"] = df.index.hour
df.head()

In [None]:
# Identify the first date on which there was solar generation
first_generation_date = min(df.loc[(df["generation_kwh"] > 0),].index.date)
print(f"First solar generation date: {first_generation_date}")

In [None]:
# Add a column indicating whether solar was active
df.loc[(df["date"] >= first_generation_date), "is_solar_active"] = True
df.loc[(df["date"] < first_generation_date), "is_solar_active"] = False
df.head()

In [None]:
daily_usage = df.groupby(["date"], as_index=True).agg({"net_kwh": "sum"})

In [None]:
fig = px.line(df, x=df.date, y="net_kwh")
fig.show()