# dynamic

> This code helps users automatically save, timestamp, and eventually source trace a specific set of data for publication.

In [None]:
#| default_exp dynamic

In [2]:
#| hide
from nbdev.showdoc import *

In [1]:
2+4

6

In [4]:
#| export
import datetime
import os
import sys
import platform
from dotenv import load_dotenv
from pathlib import Path

load_dotenv()
def set_default_dir():
    print('Setting reproduce.work config dir to ./reproduce')
    return Path("./reproduce")

reproduce_dir = os.getenv("REPROWORKDIR", set_default_dir())
dev_image_tag = os.getenv("REPRODEVIMAGE")

def read_base_config():
    with open(Path(reproduce_dir, 'config.toml'), 'r') as f:
        base_config = toml.load(f)
    return base_config

def update_watched_files(add=[], remove=[]):
    base_config = read_base_config()
    existing_files = base_config['repro']['files']['watch']
    new_files = existing_files + [a for a in add if a not in existing_files]
    new_files = [f for f in new_files if f not in remove]
    base_config['repro']['files']['watch'] = new_files

    current_develop_script = base_config['repro']['stage']['develop']['script']
    current_develop_script
    # regex to replace content in string matching 'watcher \"{to_replace}\"'
    # with 'watcher \"{new_files}\"'
    # and replace 'build_cmd' with 'python reproduce_work.build()'
    import re
    new_develop_script = re.sub(
        r'watcher \"(.*?)\"', 
        f'watcher \"{",".join(new_files)}\"', 
        current_develop_script
    )
    base_config['repro']['stage']['develop']['script'] = new_develop_script

    with open(Path(reproduce_dir, 'config.toml'), 'w') as f:
        toml.dump(base_config, f)
        
    if base_config['repro']['verbose']:
        print(f"Updated watched files to {new_files}")
    return new_files

def validate_base_config(base_config):
    required_keys = ['authors', 'repro']
    for key in required_keys:
        if key not in base_config:
            print(toml.dumps(base_config))
            print(f"Error: Missing required field '{key}' in config.toml")
            return False
        if key=='repro':
            if 'stages' not in base_config['repro']:
                print(f"Error: Missing required field 'repro.stages' in reproduce.work configuration at {reproduce_dir}/config.toml")
                return False
            for stage in base_config['repro']['stages']:
                if (f'repro.stage.{stage}' not in base_config) and (stage not in base_config['repro']['stage']):
                    print(toml.dumps(base_config))
                    print(f"Error: Missing required field repro.stage.{stage} in reproduce.work configuration at {reproduce_dir}/config.toml")
                    return False
    return True

def requires_config(func):
    def wrapper(*args, **kwargs):
        config = read_base_config()
        if not validate_base_config(config):
            raise Exception("Your reproduce.work configuration is not valid.")
        return func(*args, **kwargs)
    return wrapper

@requires_config
def register_notebook(notebook_name, notebook_dir='nbs'):
    """
    Register a notebook to the config.toml file.
    """
    notebook_path = notebook_dir + '/' + notebook_name
    base_config = read_base_config()
    
    # ensure notebook key exists
    if 'notebooks' not in base_config['repro']:
        base_config['repro']['notebooks'] = []

    if notebook_name not in base_config['repro']['notebooks']:
        base_config['repro']['notebooks'].append(notebook_path)
        with open(Path(reproduce_dir, 'config.toml'), 'w') as f:
            toml.dump(base_config, f)
        if base_config['repro']['verbose']:
            print(f"Registered notebook {notebook_path} in {reproduce_dir}/config.toml")
    else:
        if base_config['repro']['verbose']:
            print(f"Notebook {notebook_path} already registered in {reproduce_dir}/config.toml")

VAR_REGISTRY = {}


Setting reproduce.work config dir to ./reproduce


In [4]:
#generate_config()

In [5]:
publish_variable(67890, "test_var_timestamp_1")  # This should capture this line number and timestamp
publish_variable("Hello again!", "test_var_timestamp_2")  # And this line number and timestamp

VAR_REGISTRY

NameError: name 'publish_variable' is not defined

In [50]:

# Test the unified function
data_sample = {
    'name': 'John',
    'age': 28,
    'is_student': False,
    'scores': [85, 90, 78, 92],
    'birthday': pd.Timestamp('2000-01-01'),
    'matrix': np.array([[1, 2], [3, 4]]),
    'df': pd.DataFrame({
        'A': [1, 2, 3],
        'B': ['a', 'b', 'c'],
        'date': [pd.Timestamp('2022-01-01'), pd.Timestamp('2022-01-02'), pd.Timestamp('2022-01-03')]
    }),
    'nested_dict': {
        'key1': 'value1',
        'sub_dict': {
            'sub_key': 'sub_value'
        }
    },
    'none_value': None
}

toml_representation = serialize_to_toml(data_sample)
print(toml_representation)

name = "John"

name = "John"
age = 28

age = 28
is_student = False

is_student = False
scores = [85, 90, 78, 92]

scores = [85, 90, 78, 92]
birthday = "2000-01-01 00:00:00"

birthday = "2000-01-01 00:00:00"
matrix = [[1 2], [3 4]]

[df]
[dataframe]
A = [1, 2, 3]
B = ["a", "b", "c"]
date = ["2022-01-01 00:00:00", "2022-01-02 00:00:00", "2022-01-03 00:00:00"]

[nested_dict]
key1 = "value1"

key1 = "value1"
[sub_dict]
sub_key = "sub_value"

sub_key = "sub_value"


none_value = null

none_value = null


In [53]:
config = read_base_config()
with open(config['repro']['files']['dynamic'], 'r') as file:
    dynamic_data = toml.load(file)
print(toml.dumps(dynamic_data))

[p_value_str]
description = "The p-value of the coefficient on the slope of the linear regression line."
type = "string"
timestamp = "2023-10-02T05:48:29.589004"
value = "0.068"

[x]
description = "The simulated X data"
units = "kilograms"
type = "data"
timestamp = "2023-10-02T05:48:30.171394"
content_hash = "38f13b81a58a7d931600e917d77dfe8f"
timed_hash = "653424a8be5d8ff5d5d6d9a38baeb26f"
value = "array = [-0.15438854676085806, -0.5912841266673995, 1.3457620267806991, -0.3085476927297975, -0.35074090433304067, -1.343721369940541, -0.41860346256356656, 2.392890531248967, 0.22032854237060082, 0.7867023188803995, 0.08878384294999392, 0.6565087673201803, 0.2412729155438198, 0.6854353883101262, 2.153899580706892, 0.649925720150528, 1.127458119203137, -0.6357927443286684, 0.3077660698412044, -1.6328895355458346, 0.567227693439327, -0.21246173380662106, -0.7203897514131021, 0.5952129857137533, 0.18819499630282482, -0.8834998061258611, 0.7379945086294778, -0.8471634166162177, 0.07930983762624

In [17]:
# Test the save function
test_content = "This is a test content for the save function."

In [18]:
metadata = save(test_content, "saved_file.txt")
metadata

<IPython.core.display.Javascript object>

Updated watched files to ['reproduce/main.md', 'reproduce/data.toml', 'reproduce/latex/template.tex', 'saved_file.txt']


{'type': 'file',
 'timestamp': '2023-10-02T02:11:11.082654',
 'content_hash': 'd1866c6aa7d10eb57a35cc88a77802c5',
 'timed_hash': 'f9293765bd6cc1e991407203aa7da511'}

In [12]:
# Test
#x = 10
#y = "Hello"
#z = [1, 2, 3]
#save()

In [13]:
#| hide
import nbdev; nbdev.nbdev_export()