In [1]:
import pandas as pd
from datetime import datetime, timedelta
from os import  getenv
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from pymongo.database import Database
from urllib.parse import quote_plus
from getpass import getpass 


In [2]:
# Define constants
user = getenv('MONGO_USER') or input('Username: ')
password = getenv('MONGO_PASSWORD') or getpass('Password: ')
url = f"mongodb+srv://{quote_plus(user)}:{quote_plus(password)}@project-data.fyzivf2.mongodb.net/?retryWrites=true&w=majority&appName=project-data"

def mongo_client(url: str) -> MongoClient:
    """Establishes a connection to a MongoDB client.

    Args:
        url (str): The connection URL for the MongoDB client

    Returns:
        MongoClient (MongoClient): The MongoDB client object
    """
    client = MongoClient(url, server_api=ServerApi(version='1'))
    print(f"Connected to MongoDB client")
    return client

def mongo_database(client, db_name:str) -> Database:
    """Connects to a MongoDB database.

    Args:
        db_name (str): The name of the database to connect to

    Returns:
        Database (Database): The MongoDB database object
    """
    db = client[db_name]
    print(f"Connected to MongoDB database: {db_name}")
    print(f"Collections available: {', '.join(db.list_collection_names())}")
    return db

def read_mongo_data(db, collection_name: str, find_kwargs={}) -> pd.DataFrame:
    """Reads data from a MongoDB collection into a pandas DataFrame.

    Args:
        collection_name (str): The name of the collection to read from

    Returns:
        pd.DataFrame: The data from the collection as a DataFrame
    """
    # Query all documents in the collection
    data = db[collection_name].find(find_kwargs)
    print(f'Collection downloaded: {collection_name}')

    # Convert to pandas DataFrame and remove the MongoDB '_id'.
    df = pd.DataFrame(list(data)).drop('_id', axis=1)

    return df

client = mongo_client(url)
db = mongo_database(client, 'data')

df_orig = read_mongo_data(db, 'features', {'state': 'NSW'})



Connected to MongoDB client
Connected to MongoDB database: data
Collections available: features, temperature, total_demand
Collection downloaded: features


In [28]:
df = df_orig.copy()

for i in range(1, 9):
    shift = 30*i
    df_shift = df.copy()[['DATETIME', 'TOTALDEMAND']].rename(columns={'TOTALDEMAND': f'TM{shift}'})
    df_shift['DATETIME'] += timedelta(minutes=shift)
    if i < 8:
        df = pd.merge(df, df_shift, how='left', on='DATETIME')

df.iloc[100]


TOTALDEMAND                      6362.67
TEMPERATURE                         19.5
state                                NSW
DATETIME             2010-01-03 02:00:00
year                                2010
month                                0.5
day_of_month                           3
day_of_week                    -0.781831
is_weekday                          True
period_of_day                        0.5
is_public_holiday                  False
is_daylight                        False
h1_year                           2010.0
h24_year                          2010.0
h1_month                             0.5
h24_month                            0.5
h1_day_of_month                      3.0
h24_day_of_month                     4.0
h1_day_of_week                 -0.781831
h24_day_of_week                      0.0
h1_TOTALDEMAND                   6088.35
h24_TOTALDEMAND                  6218.66
TM30                             6537.47
TM60                             6820.07
TM90            