# Libraries

In [53]:
# General
import pandas as pd
import numpy as np

# Data Management
from dataclasses import dataclass, field
from collections import namedtuple
from typing import List, Any

# Graphing
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# So that pandas .dt.time series can play nice with matploblib
import datetime

# Random Forest
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Aesthetics

In [None]:
scaler = 2
fig_wide = 6.267717 * scaler # width of a4 minus 2.54cm margins either side then x by scaler to scale everything"s size easily
fig_gold = 1.618

# Data Import

## Functions

In [None]:
# Convert timestamps to datetimes
def convert_time(dataframe, column, format = None, unit = None, new_name = "Time (Datetime)"):
    dataframe[column] = pd.to_datetime(dataframe[column], format = format, unit = unit)
    dataframe.rename(columns = {column: new_name}, inplace = True)

# Clean data
def cleaner(dataframe, variable_name):
    dataframe[variable_name] = dataframe[variable_name].replace([-9999.0], np.nan) # Replace visible/known missing values
    dataframe = dataframe.reindex(pd.date_range(start = dataframe.index[0], end = dataframe.index[-1], freq = "30min")) # Reindex to deal with hidden missing values
    #dataframe.index.name = "date" # put the name of the index column back!
    dataframe = dataframe[[variable_name]].copy() # Select Variables
    return dataframe

## Data

Declare my dataclass to contain my data

In [88]:
# Namedtuple type to store coordinates
coordinates = namedtuple("coordinates", ["x", "y"])

# Dataclass to contain...
@dataclass
class metadata:
    coords: coordinates
    elevation: int
    desc: str

@dataclass
class aesthetics:
    longnames: dict
    units: dict
    colours: dict
    
@dataclass
class datatables:
    flux: pd.DataFrame
    satellite: pd.DataFrame
    paired: pd.DataFrame

# Dataclass to contain my data
@dataclass
class location:
    name: str
    origin: str
    data: datatables
    stats: dict
    meta: metadata
    aes: aesthetics

In [None]:
# Generalized filter function
def filter_by(objects: List[Any], attribute: str, value: Any) -> List[Any]:
    """
    Filters a list of objects based on a specific attribute and its value.
    Works recursively for nested dataclass attributes.

    :param objects: List of dataclass instances to filter.
    :param attribute: Attribute name to filter by (supports nested attributes using dot notation).
    :param value: The value to match for the specified attribute.
    :return: List of objects where the attribute matches the value.
    """
    def get_nested_attr(obj: Any, attribute: str) -> Any:
        """
        Recursively retrieves the value of a nested attribute using dot notation.
        """
        attributes = attribute.split('.')  # Split by dot notation if nested
        for attr in attributes:
            obj = getattr(obj, attr, None)
            if obj is None:
                return None
        return obj

    return [obj for obj in objects if get_nested_attr(obj, attribute) == value]

# e.g. filter_by(data, "meta.coords.x", 19.04520892)

In [93]:
se_sto = location(
    name = "Abisko-Stordalen Palsa Bog",
    origin = "flux",
    data = datatables(
        flux = pd.DataFrame(),
        satellite = pd.DataFrame(),
        paired = pd.DataFrame(),
    ),
    stats = {},
    meta = metadata(
        coords = coordinates(19.04520892, 68.35594288),
        elevation = int,
        desc = "Flux tower eddy-covariance data",
    ),
    aes = aesthetics(
        longnames = {},
        units = {},
        colours = {},
    ),
)

data = [se_sto]

In [86]:
se_sto.meta.coords.x

19.04520892

[location(name='Abisko-Stordalen Palsa Bog', origin='flux', data=datatables(flux=Empty DataFrame
 Columns: []
 Index: [], satellite=Empty DataFrame
 Columns: []
 Index: [], paired=Empty DataFrame
 Columns: []
 Index: []), stats={}, meta=metadata(coords=coordinates(x=19.04520892, y=68.35594288), elevation=<class 'int'>, desc='Flux tower eddy-covariance data'), aes=aesthetics(longnames={}, units={}, colours={}))]