In [1]:
import pandas as pd
from datetime import datetime, timedelta
from os import  getenv
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from pymongo.database import Database
from urllib.parse import quote_plus
from getpass import getpass 


In [6]:
# Define constants
user = getenv('MONGO_USER') or input('Username: ')
password = getenv('MONGO_PASSWORD') or getpass('Password: ')
url = f"mongodb+srv://{quote_plus(user)}:{quote_plus(password)}@project-data.fyzivf2.mongodb.net/?retryWrites=true&w=majority&appName=project-data"

def mongo_client(url: str) -> MongoClient:
    """Establishes a connection to a MongoDB client.

    Args:
        url (str): The connection URL for the MongoDB client

    Returns:
        MongoClient (MongoClient): The MongoDB client object
    """
    client = MongoClient(url, server_api=ServerApi(version='1'))
    print(f"Connected to MongoDB client")
    return client

def mongo_database(client, db_name:str) -> Database:
    """Connects to a MongoDB database.

    Args:
        db_name (str): The name of the database to connect to

    Returns:
        Database (Database): The MongoDB database object
    """
    db = client[db_name]
    print(f"Connected to MongoDB database: {db_name}")
    print(f"Collections available: {', '.join(db.list_collection_names())}")
    return db

def read_mongo_data(db, collection_name: str, filter={}) -> pd.DataFrame:
    """Reads data from a MongoDB collection into a pandas DataFrame.

    Args:
        collection_name (str): The name of the collection to read from

    Returns:
        pd.DataFrame: The data from the collection as a DataFrame
    """
    # Query all documents in the collection
    data = db[collection_name].find(filter)
    print(f'Collection downloaded: {collection_name}')

    # Convert to pandas DataFrame and remove the MongoDB '_id'.
    df = pd.DataFrame(list(data)).drop('_id', axis=1)

    return df

client = mongo_client(url)
db = mongo_database(client, 'data')

df_orig = read_mongo_data(db, 'features', {'state': 'NSW'})



Connected to MongoDB client
Connected to MongoDB database: data
Collections available: features, temperature, total_demand
Collection downloaded: features


In [17]:
df = df_orig.copy()

for i in range(1, 49):
    shift = 30*i
    df_shift = df.copy()[['DATETIME', 'TOTALDEMAND']].rename(columns={'TOTALDEMAND': f'TM{shift}'})
    df_shift['DATETIME'] += timedelta(minutes=shift)
    df = pd.merge(df, df_shift, how='left', on='DATETIME')

df = df.dropna().reset_index(drop=True)

df.iloc[0].to_dict()


{'TOTALDEMAND': 7574.85,
 'TEMPERATURE': 24.0,
 'state': 'NSW',
 'DATETIME': Timestamp('2010-01-02 00:00:00'),
 'year': 2010,
 'month': 0.49999999999999994,
 'day_of_month': 2,
 'day_of_week': -0.9749279121818236,
 'is_weekday': True,
 'period_of_day': 0.0,
 'is_public_holiday': False,
 'is_daylight': False,
 'h1_year': 2010.0,
 'h1_month': 0.49999999999999994,
 'h1_day_of_month': 2.0,
 'h1_day_of_week': -0.9749279121818236,
 'h1_TOTALDEMAND': 7099.73,
 'h24_year': 2010.0,
 'h24_month': 0.49999999999999994,
 'h24_day_of_month': 3.0,
 'h24_day_of_week': -0.7818314824680299,
 'h24_TOTALDEMAND': 7284.1,
 'TM30': 7782.68,
 'TM60': 8041.77,
 'TM90': 8210.54,
 'TM120': 8295.59,
 'TM150': 8343.8,
 'TM180': 8333.54,
 'TM210': 8573.05,
 'TM240': 8635.78,
 'TM270': 8562.12,
 'TM300': 8544.55,
 'TM330': 8640.56,
 'TM360': 8765.46,
 'TM390': 8835.92,
 'TM420': 8883.98,
 'TM450': 8791.61,
 'TM480': 8715.69,
 'TM510': 8603.11,
 'TM540': 8557.94,
 'TM570': 8501.39,
 'TM600': 8477.34,
 'TM630': 8421.0

In [18]:
df.to_csv('data/modelling_data.csv', lineterminator='\n', index=False)

df


Unnamed: 0,TOTALDEMAND,TEMPERATURE,state,DATETIME,year,month,day_of_month,day_of_week,is_weekday,period_of_day,...,TM1170,TM1200,TM1230,TM1260,TM1290,TM1320,TM1350,TM1380,TM1410,TM1440
0,7574.85,24.0,NSW,2010-01-02 00:00:00,2010,0.5,2,-0.974928,True,0.000000,...,6248.31,6211.49,6282.85,6377.32,6544.33,6812.03,7117.23,7483.69,7809.31,8038.00
1,7343.30,24.3,NSW,2010-01-02 00:30:00,2010,0.5,2,-0.974928,True,0.130526,...,6198.61,6248.31,6211.49,6282.85,6377.32,6544.33,6812.03,7117.23,7483.69,7809.31
2,7099.73,24.2,NSW,2010-01-02 01:00:00,2010,0.5,2,-0.974928,True,0.258819,...,6237.35,6198.61,6248.31,6211.49,6282.85,6377.32,6544.33,6812.03,7117.23,7483.69
3,6779.80,24.2,NSW,2010-01-02 01:30:00,2010,0.5,2,-0.974928,True,0.382683,...,6370.48,6237.35,6198.61,6248.31,6211.49,6282.85,6377.32,6544.33,6812.03,7117.23
4,6497.47,24.1,NSW,2010-01-02 02:00:00,2010,0.5,2,-0.974928,True,0.500000,...,6506.42,6370.48,6237.35,6198.61,6248.31,6211.49,6282.85,6377.32,6544.33,6812.03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196412,7373.83,20.4,NSW,2021-03-16 22:00:00,2021,1.0,16,0.781831,True,-0.500000,...,5984.86,6099.16,6314.55,6613.53,6743.32,6908.59,6955.38,7067.12,7184.81,7189.56
196413,7345.78,20.5,NSW,2021-03-16 22:30:00,2021,1.0,16,0.781831,True,-0.382683,...,5896.86,5984.86,6099.16,6314.55,6613.53,6743.32,6908.59,6955.38,7067.12,7184.81
196414,7218.99,20.3,NSW,2021-03-16 23:00:00,2021,1.0,16,0.781831,True,-0.258819,...,5890.23,5896.86,5984.86,6099.16,6314.55,6613.53,6743.32,6908.59,6955.38,7067.12
196415,7056.88,19.7,NSW,2021-03-16 23:30:00,2021,1.0,16,0.781831,True,-0.130526,...,5989.56,5890.23,5896.86,5984.86,6099.16,6314.55,6613.53,6743.32,6908.59,6955.38
