# Setup - libraries and functions

In [3]:
import requests
import os
import json
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
from dotenv import load_dotenv
from git import Repo


In [4]:
# Get the API key
load_dotenv()
api_key = os.getenv('API_KEY')

dir_current = os.curdir
dir_data_raw = f'{dir_current}/data-raw'
dir_cumulative = f'{dir_current}/data-cumulative'
dir_repo = f'{dir_current}/.git'
repo = Repo(dir_repo)

print(f' ... Current dir is {dir_current}, Raw data saved to {dir_data_raw}')

 ... Current dir is ., Raw data saved to ./data-raw


In [5]:
def report_timespan(df):
    df['interval_start'] = pd.to_datetime(df['interval_start'])

    date_start = df['interval_start'].min().strftime('%a %Y-%m-%d %H:%M')
    date_final = df['interval_start'].max().strftime('%a %Y-%m-%d %H:%M')

    print(f' ... From {date_start} to {date_final}')

In [6]:
def get_raw_usage_data(utility, sort = True, save = True, verbose = True):

    if not utility in ['electricity', 'gas']:
        raise ValueError('utility must be either "electricity" or "gas"')
    
    electric_url = "https://api.octopus.energy/v1/electricity-meter-points/1200039599083/meters/24J0348055/consumption/"
    gas_url = "https://api.octopus.energy/v1/gas-meter-points/620841506/meters/E6S17881452061/consumption/"
    
    if utility == 'electricity':
        url = electric_url
    elif utility == 'gas':
        url = gas_url
    
    response = requests.get(url, auth=(api_key, "")) 
    data = response.json()['results']
    df = pd.DataFrame(data)
    
    if save:
        print(f"Status Code ({utility}): {response.status_code}")
        file_raw_data = f'{dir_data_raw}/{datetime.now().strftime('%Y-%m-%d')}_rawdata_{utility}.csv'
        df.to_csv(file_raw_data, index = False)
        print(f' ... Saving as {file_raw_data}')
        print(f' ... Records {df.shape[0]}')
        print(f' ... Variables {df.shape[1]} : {df.columns}')
        if verbose: report_timespan(df)
    else:
        if verbose: report_timespan(df)

    df.columns = [f'{utility}_consumption', 'interval_start', 'interval_end']
    if sort:
        df = df.sort_values(by='interval_start', ascending=True)

    return df

In [7]:
def get_utility_data(directory, utility = 'gas'):

    """Returns a list of CSV files in the given directory."""
    raw_files = [file for file in os.listdir(directory) if file.endswith(".csv") and 'rawdata' in file and utility in file]

    dfs = [pd.read_csv(f'{directory}/{file}') for file in raw_files]  # Read each CSV file into a DataFrame
    df_combined = pd.concat(dfs, ignore_index=True)  # Concatenate all DataFrames
    df_unique = df_combined.drop_duplicates(subset=['interval_start', 'interval_end']).copy()
    df_unique.columns = [f'{utility}_consumption','interval_start', 'interval_end']

    df_unique['weekday'] = df_unique['interval_start'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ').strftime('%a'))
    df_unique['day'] = df_unique['interval_start'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ').strftime('%Y-%m-%d'))
    df_unique['dom'] = df_unique['interval_start'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ').strftime('%m-%d'))
    df_unique['time'] = df_unique['interval_start'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ').strftime('%H:%M'))
    df_unique['x_label'] = df_unique.apply(lambda row: f"{row['weekday']} {row['dom']} {row['time']}", axis=1)
    df_unique = df_unique.sort_values(by=['day', 'time'], ascending=True)

    print(f' ... Cumulative data for {utility}, {df_unique.shape[0]} records')
    report_timespan(df_unique)
    return(df_unique)


In [8]:
def get_plot(df, utility = 'both'):

    # Create the plot
    fig = go.Figure()

    # Plot electricity consumption
    if utility == 'electricity' or utility == 'both':
        fig.add_trace(go.Scatter(
            x=df['x_label'], 
            y=df['electricity_consumption'], 
            mode='lines+markers', 
            name='Electricity', 
            line=dict(color='blue')
        ))

    # Plot gas consumption
    if utility == 'gas' or utility == 'both':
        fig.add_trace(go.Scatter(
            x=df['x_label'], 
            y=df['gas_consumption'], 
            mode='lines+markers', 
            name='Gas', 
            line=dict(color='red')
        ))

    # Update layout
    fig.update_layout(
        title="Energy Consumption Over Time",
        xaxis_title="Time Interval (Start)",
        yaxis_title="Consumption",
        xaxis=dict(tickangle=45),
        template="plotly_white"
    )

    # Show the plot
    return(fig)
    #fig.show()

# Get new data

In [13]:
# get data from octopus as a dataframe
df_electricity = get_raw_usage_data('electricity')
df_gas = get_raw_usage_data('gas')

Status Code (electricity): 200
 ... Saving as ./data-raw/2025-02-15_rawdata_electricity.csv
 ... Records 100
 ... Variables 3 : Index(['consumption', 'interval_start', 'interval_end'], dtype='object')
 ... From Tue 2025-02-11 22:00 to Thu 2025-02-13 23:30
Status Code (gas): 200
 ... Saving as ./data-raw/2025-02-15_rawdata_gas.csv
 ... Records 100
 ... Variables 3 : Index(['consumption', 'interval_start', 'interval_end'], dtype='object')
 ... From Tue 2025-02-11 22:00 to Thu 2025-02-13 23:30


In [14]:
# create a df with both readings in where dates exist for both
df_gas = get_utility_data(dir_data_raw, utility = 'gas')
df_electricity = get_utility_data(dir_data_raw, utility = 'electricity')

df_both = pd.merge(df_gas, 
                   df_electricity[['electricity_consumption','interval_start']], 
                   on='interval_start', how='outer')
df_both = df_both.sort_values(by='interval_start', ascending=True)

# save
file_cumulative = f'{dir_cumulative}/cumulative.csv'
df_both.to_csv(file_cumulative, index=False)
print(f' ... MERGED dataset, saved to {file_cumulative}')
report_timespan(df_both)

 ... Cumulative data for gas, 964 records
 ... From Fri 2025-01-24 22:00 to Thu 2025-02-13 23:30
 ... Cumulative data for electricity, 920 records
 ... From Fri 2025-01-24 22:00 to Thu 2025-02-13 23:30
 ... MERGED dataset, saved to ./data-cumulative/cumulative.csv
 ... From Fri 2025-01-24 22:00 to Thu 2025-02-13 23:30


# Push to github

In [11]:
# Stage, commit, and push changes
repo.git.add(".")
git_message = f'Commit via GitPython - {datetime.now().strftime('%A %Y-%m-%d %H:%M')}'
print(git_message)
repo.index.commit(git_message)
origin = repo.remote(name='origin')
origin.push()

Commit via GitPython - Thursday 2025-02-13 12:03


[<git.remote.PushInfo at 0x174e34d00e0>]

# Run charts

In [12]:
# display chart
days = 3
df_week = df_both[-(days*48):]
fig = get_plot(df_week, 'both')
fig.show()

# tuesday - laundry

# Testing

In [62]:
raw_files = [file for file in os.listdir(dir_data_raw) if file.endswith(".csv") and 'rawdata' in file and 'gas' in file]
raw_files = sorted(raw_files, reverse = True)

for x in raw_files:
    print(x)

2025-02-04_rawdata_gas.csv
2025-02-03_rawdata_gas.csv
2025-02-02_rawdata_gas.csv
2025-02-01_rawdata_gas.csv
2025-01-31_rawdata_gas.csv
2025-01-30_rawdata_gas.csv
2025-01-29_rawdata_gas.csv
2025-01-28_rawdata_gas.csv
