In [1]:
from typing import Tuple

import hopsworks
import pandas as pd

from settings import SETTINGS


In [2]:
def load_dataset_from_feature_store(
    feature_view_version: int, 
    training_dataset_version: int, 
    fh: int = 24
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """Load features from feature store.

    Args:
        feature_view_version (int): feature store feature view version to load data from
        training_dataset_version (int): feature store training dataset version to load data from
        fh (int, optional): Forecast horizon. Defaults to 24.

    Returns:
        Train and test splits loaded from the feature store as pandas dataframes.
    """

    project = hopsworks.login(
        api_key_value=SETTINGS["FS_API_KEY"], 
        project=SETTINGS["FS_PROJECT_NAME"]
    )
    fs = project.get_feature_store()

    feature_view = fs.get_feature_view(
            name="pm25_singapore_view", version=feature_view_version
        )
    data, _ = feature_view.get_training_data(
        training_dataset_version=training_dataset_version
    )

    return data

In [4]:
dataset = load_dataset_from_feature_store(
    feature_view_version=2,
    training_dataset_version=1
    )

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/284820
Connected. Call `.close()` to terminate connection gracefully.




In [5]:
dataset.head()

Unnamed: 0,timestamp,update_timestamp,reading_west,reading_east,reading_central,reading_south,reading_north,reading_average
0,2016-06-02T23:00:00.000Z,2016-06-02T23:11:18.000Z,34,3,32,9,41,30
1,2020-05-01T20:00:00.000Z,2020-05-01T20:08:53.000Z,7,10,10,10,11,11
2,2021-06-10T05:00:00.000Z,2021-06-10T05:08:53.000Z,16,8,23,29,24,24
3,2023-09-08T02:00:00.000Z,2023-09-08T02:08:53.000Z,9,19,17,8,8,15
4,2020-09-07T22:00:00.000Z,2020-09-07T22:08:52.000Z,10,16,13,14,7,14


In [7]:
dataset.sort_values(by='timestamp')

Unnamed: 0,timestamp,update_timestamp,reading_west,reading_east,reading_central,reading_south,reading_north,reading_average
33128,2016-02-10T06:00:00.000Z,2016-03-03T13:37:03.000Z,15,14,10,9,15,14
50966,2016-02-10T07:00:00.000Z,2016-03-03T13:37:03.000Z,19,19,15,26,12,21
38483,2016-02-10T08:00:00.000Z,2016-03-03T13:37:03.000Z,13,16,17,21,12,19
16715,2016-02-10T09:00:00.000Z,2016-03-03T13:37:03.000Z,8,14,13,15,10,14
33965,2016-02-10T10:00:00.000Z,2016-03-03T13:37:03.000Z,13,14,14,12,9,15
...,...,...,...,...,...,...,...,...
24933,2023-12-17T23:00:00.000Z,2023-12-17T23:03:52.000Z,6,18,12,8,6,12
29899,2023-12-18T00:00:00.000Z,2023-12-18T00:03:52.000Z,7,17,10,7,7,11
5162,2023-12-18T01:00:00.000Z,2023-12-18T01:03:54.000Z,8,13,13,10,7,12
49970,2023-12-18T02:00:00.000Z,2023-12-18T02:03:52.000Z,10,18,15,13,9,16


In [6]:
dataset.tail()

Unnamed: 0,timestamp,update_timestamp,reading_west,reading_east,reading_central,reading_south,reading_north,reading_average
62820,2020-11-10T21:00:00.000Z,2020-11-10T21:08:52.000Z,24,37,27,27,27,33
62821,2020-07-22T21:00:00.000Z,2020-07-22T21:08:52.000Z,4,12,14,3,12,11
62822,2018-08-16T22:00:00.000Z,2018-08-16T22:03:53.000Z,12,13,15,17,14,17
62823,2020-05-01T09:00:00.000Z,2020-05-01T09:08:52.000Z,7,8,4,9,3,7
62824,2023-06-26T22:00:00.000Z,2023-06-26T22:08:52.000Z,11,12,8,12,4,11
