In [None]:
# To be able to make edits to repo without having to restart notebook
%load_ext autoreload
%autoreload 2

In [None]:
import pytest
import yaml
import json
import pathlib
import pandas as pd
import mongomock
import xarray as xr
import numpy as np
import fsspec
from fsspec.implementations.local import LocalFileSystem
from pathlib import Path

from src.store.data_access_objects import (
    MongoDAO,
    FileSystemDAO,
    InMemoryObjectDAO,
    datetime_to_microseconds,
    microseconds_to_datetime,
)

from src.store.repositories import (
    DomainModelRepository, domain_model_json_schema,
    DataRepository,
    InMemoryObjectRepository,
)

from src.store.datafile_adapters import (
    XarrayDataArrayNetCDFAdapter,
    XarrayDataArrayZarrAdapter,
    AbstractDataFileAdapter,
)

from src.store import UnitOfWorkProvider
from datetime import datetime, timezone, timedelta


In [None]:
# Functions

def deserialize_dataarray(data_object):
        """Deserializes a data object.
        Arguments:
            data_object {dict} -- The data object to deserialize.
        Returns:
            dict -- The deserialized data object.
        """
        attrs = data_object.attrs.copy()
        for key, value in attrs.items():
            if isinstance(value, str):
                value = value.replace("'", '"')
                if value.lower() == 'true':
                    attrs[key] = True
                elif value.lower() == 'false':
                    attrs[key] = False
                elif value.lower() == 'none':
                    attrs[key] = None
                elif value.startswith('{'):
                    attrs[key] = json.loads(value)
            if isinstance(value, np.ndarray):
                attrs[key] = value.tolist()
        data_object.attrs = attrs
        return data_object


In [None]:
# Mock DB client
mongo_client = mongomock.MongoClient()

# Demo filesystem
tmpdir =  Path.cwd().parent / r"data" / r"internal"
tmpdir = pathlib.Path(tmpdir)
filesystem = LocalFileSystem(root=str(tmpdir))

# Empty memory store for demo
memory_store = dict()

# Get uow to act on database
uow_provider = UnitOfWorkProvider(mongo_client, filesystem, memory_store)
unit_of_work = uow_provider(str(tmpdir))

# Get raw_property_models
property_models_path = Path.cwd().parent / r"tests" / r"data" / r"valid_data" / r"models" / r"property_models.json"
with open(property_models_path, 'r') as file:
    raw_property_models = json.load(file)

# Get raw_metamodels
metamodels_dir = Path.cwd().parent / r"tests" / r"data" / r"valid_data" / r"models" / r"metamodels"
metamodel_filepaths = list(metamodels_dir.glob("*.json"))
raw_metamodels = []
for filepath in metamodel_filepaths:
    with open(filepath, 'r') as file:
        raw_metamodels.append(json.load(file))

# Get raw_data_models
data_models_dir = Path.cwd().parent / r"tests" / r"data" / r"valid_data" / r"models" / r"data_models"
data_model_filepaths = list(data_models_dir.glob("*.json"))
raw_data_models = []
for filepath in data_model_filepaths:
    with open(filepath, 'r') as file:
        raw_data_models.append(json.load(file))

# Get raw_records
netcdf_dir = Path.cwd().parent / r"data" / r"input"
records_files = list(netcdf_dir.glob("*.xlsx"))
raw_records = []
for filepath in records_files:
        file = pd.read_excel(filepath, engine='openpyxl', dtype=str) 
        file_json = file.to_json(orient="records")
        records = json.loads(file_json)
        raw_records.extend(records)

# Get dataarrays
netcdf_dir = Path.cwd().parent / r"data" / r"input"
netcdf_files = list(netcdf_dir.glob("*.nc"))
dataarrays = []
for filepath in netcdf_files:
    dataarray = xr.open_dataarray(filepath)
    dataarray = deserialize_dataarray(dataarray)
    dataarrays.append(dataarray)

# Add property models, metamodels, data models, and records
with unit_of_work as uow:
    for property_model in raw_property_models:
        uow.domain_models.add(property_model)
    for metamodel in raw_metamodels:
        uow.domain_models.add(metamodel)
    for data_model in raw_data_models:
        uow.domain_models.add(data_model)
    for record in raw_records:
        if not record.get("has_file"):
            uow.data.add(record)
    for dataarray in dataarrays:
        if not dataarray.attrs.get("schema_ref") == "test":
            uow.data.add(dataarray)
    uow.commit()
