In [1]:
#|default_exp bobby_tables
## Standard libraries
import os
import math
import numpy as np
import time
from fastcore.all import *
from nbdev.showdoc import *
# Configure environment
os.environ['XLA_PYTHON_CLIENT_PREALLOCATE']='false' # Tells Jax not to hog all of the memory to this process.

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import set_matplotlib_formats
# set_matplotlib_formats('svg', 'pdf') # For export
from matplotlib.colors import to_rgba
import seaborn as sns
sns.set()

## Progress bar
from tqdm.auto import tqdm, trange

%load_ext autoreload
%autoreload 2

# Bobby "Father-of-all-Comparisons" Tables
> A handy-dandy class for computing comparisons en masse

# Implementation

In [5]:
import pandas as pd
from fastcore.all import *
from tqdm.auto import *

class BobbyTables():
    def __init__(
        self,
        datasets, # a list containing the datasets; these will go in the columns
        methods, # a list containing functions which transform the dataset
        metrics, # a list containing metrics to apply to the transformed data. Should return scalar values.
        dataset_names = None,
    ):
        store_attr()
        if self.dataset_names is None:
            self.dataset_names = [f"Dataset {i+1}" for i in range(len(self.datasets))]
        self.transformed_data = 
        
        
        pd.DataFrame(index=self.dataset_names, 
                                             columns=[method.__name__ for method in self.methods])
    def compute_transforms(self):
        for X_name, X in tqdm(zip(self.dataset_names, self.datasets)):
            for f in tqdm(self.methods):
                # Create a dictionary with the transformed data
                X_dict = {'X': f(X)}
                # Assign the dictionary to the DataFrame cell
                self.transformed_data.loc[X_name, f.__name__] = X_dict

    def compute_metrics(self):
        for X_name in tqdm(self.dataset_names):
            for f in tqdm(self.methods):
                # Create a copy of the dictionary in the DataFrame cell
                X_dict = self.transformed_data.loc[X_name, f.__name__].copy()
                for met in tqdm(self.metrics):
                    # Compute the metric and add it to the dictionary
                    X_dict[met.__name__] = met(X_dict['X'])
                # Assign the updated dictionary back to the DataFrame cell
                self.transformed_data.loc[X_name, f.__name__] = X_dict

    def table(self, met, latex=False):
        try:
            # Create a new DataFrame based on the 'met' key from the dictionaries
            metric_data = self.transformed_data.applymap(lambda x: x.get(met, None))
        except AttributeError:
            print("The DataFrame does not contain dictionaries.")
            return

        # Print the data
        if latex:
            print(metric_data.to_latex())
        else:
            print(metric_data)


# Tests

In [3]:
# Test data
datasets = [np.array([1, 2, 3]), np.array([4, 5, 6]), np.array([7, 8, 9])]
methods = [np.square, np.sqrt]
metrics = [np.max, np.min]

# Create BobbyTables instance
bt = BobbyTables(datasets, methods, metrics)

# Compute transforms
bt.compute_transforms()

# Compute metrics
bt.compute_metrics()

# Print table
bt.table('max')
bt.table('min')

TypeError: DataFrame.__init__() got an unexpected keyword argument 'index'

In [17]:
bt.transformed_data

Unnamed: 0,square,sqrt
Dataset 1,,
Dataset 2,,
Dataset 3,,


In [6]:
iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]
pd.MultiIndex.from_product(iterables)

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

In [17]:
import xarray as xr

In [27]:
X_np = np.random.rand(4,9)

In [28]:
X_xr = xr.DataArray(X_np,dims=("x","y"))

In [29]:
X_xrS = [xr.DataArray(np.random.rand(4,9),dims=("x","y")) for i in range(10)]

In [33]:
X_xr.loc[2]

In [37]:
X_xr.isel(y=1,x=2)

In [38]:
X_xr.sel(y=1,x=2)

In [42]:
X_xr.sel(y=[1,2])

In [47]:
X_xrS

[<xarray.DataArray (x: 4, y: 9)>
 array([[0.63801396, 0.65111391, 0.38905505, 0.48182035, 0.57690872,
         0.58420122, 0.39427126, 0.34458626, 0.64749813],
        [0.61388341, 0.9153965 , 0.63027098, 0.98227745, 0.26944019,
         0.75274306, 0.85683908, 0.13844137, 0.29495341],
        [0.95036496, 0.57824576, 0.9320653 , 0.73240391, 0.14845341,
         0.55105553, 0.2594477 , 0.30403977, 0.14443947],
        [0.09564034, 0.82144494, 0.766945  , 0.30560011, 0.15359002,
         0.38063087, 0.16932888, 0.48298054, 0.47097685]])
 Dimensions without coordinates: x, y,
 <xarray.DataArray (x: 4, y: 9)>
 array([[0.04914911, 0.26707203, 0.86984013, 0.46518003, 0.96210469,
         0.77291233, 0.38735133, 0.40811404, 0.94863996],
        [0.55196919, 0.23973144, 0.03705968, 0.35179575, 0.41900666,
         0.12881975, 0.75446828, 0.54239917, 0.05909838],
        [0.00814619, 0.93753326, 0.80325845, 0.2975334 , 0.43033077,
         0.2253149 , 0.08891793, 0.89900395, 0.12252361],
     

In [50]:
D = xr.Dataset(
        data_vars = {
            "method1": {
                "dataset1": (["n1","d1"],np.random.rand(10,2)),
                "dataset2": (["n2","d2"],np.random.rand(100,4)),
            }
            "method1": {
                "dataset1": (["n1","d1"],np.random.rand(10,2)),
                "dataset2": (["n2","d2"],np.random.rand(100,4)),
            }
            "method1": {
                "dataset1": (["n1","d1"],np.random.rand(10,2)),
                "dataset2": (["n2","d2"],np.random.rand(100,4)),
            }
        }
)

SyntaxError: invalid syntax. Perhaps you forgot a comma? (3589269271.py, line 3)

In [51]:

import xarray as xr
import numpy as np

# Create inner datasets
ds1 = xr.Dataset(
    data_vars={"var1": (("x"), np.random.rand(5))},
    coords={"x": np.arange(5)},
)

ds2 = xr.Dataset(
    data_vars={"var2": (("y"), np.random.rand(3))}, 
    coords={"y": np.arange(3)}
)

# Create outer dataset 
outer_ds = xr.Dataset(
    data_vars={"ds1": ds1, "ds2": ds2},
)

print(outer_ds)

TypeError: cannot directly convert an xarray.Dataset into a numpy array. Instead, create an xarray.DataArray first, either with indexing on the Dataset or by invoking the `to_dataarray()` method.

In [None]:
D

In [None]:
# sync changes to the library
from IPython.display import display, Javascript
import time
display(Javascript('IPython.notebook.save_checkpoint();'))
time.sleep(2)
!pixi run nbsync