In [5]:
import json
from time import time
from diskcache import Cache
from joblib import dump
from pathlib import Path


class DataMander:
    def __init__(self, *path):
        self.project_path = "/".join(['.datamander'] + list(path))
        self.cache = Cache(self.project_path + '/stats')
        self.artifact_path = Path(self.project_path + '/artifacts')
        self.log_path = Path(self.project_path + '/logs')
        if '_artifacts' not in self.cache:
            self.cache['_artifacts'] = {}
        if '_logs' not in self.cache:
            self.cache['_logs'] = {}
        if '_templates' not in self.cache:
            self.cache['_templates'] = {}
        
    def add_info(self, key, value, method='overwrite'):
        if method == 'overwrite':
            self.cache[key] = value
        if method == 'append':
            if not isinstance(value, list):
                value = [value]
            if key in self.cache:
                value = value + self.cache[key]
            self.cache[key] = value
        self.cache['updated_at'] = int(time())

    def _add_to_key(self, top_key, key, value):
        orig = self.cache[top_key] 
        orig[key] = value
        self.add_info(top_key, orig)

    def add_artifact(self, key, obj, **metadata):
        file_location = self.artifact_path / f'{key}.joblib'
        if not file_location.parent.exists():
            file_location.parent.mkdir(parents=True)
        dump(obj, file_location)
        self._add_to_key('_artifacts', key, {'path': file_location, **metadata})

    def add_template(self, key, template):
        self._add_to_key('_templates', key, template)

    def add_logs(self, key, logs):
        self._add_to_key('_logs', key, logs)
                    
    def info(self):
        return {k: self.cache[k] for k in self.cache.iterkeys()}

In [28]:
mander = DataMander('arxiv-frontpage', 'new-datasets', '1021232')

In [29]:
mander.add_info('epochs', 12, method='append')

In [31]:
mander.info().keys()

dict_keys(['_artifacts', '_logs', '_templates', 'cv_results', 'epochs', 'updated_at'])

In [9]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import randint

from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import HalvingRandomSearchCV

rng = np.random.RandomState(0)

X, y = datasets.make_classification(n_samples=400, n_features=12, random_state=rng)

clf = RandomForestClassifier(n_estimators=20, random_state=rng)

param_dist = {
    "max_depth": [3, None],
    "max_features": randint(1, 6),
    "min_samples_split": randint(2, 11),
    "bootstrap": [True, False],
    "criterion": ["gini", "entropy"],
}

rsh = HalvingRandomSearchCV(
    estimator=clf, param_distributions=param_dist, factor=2, random_state=rng
)
rsh.fit(X, y)

In [10]:
mander.add_artifact('rsh', rsh)

In [11]:
mander.add_info('cv_results', pd.DataFrame(rsh.cv_results_))

In [13]:
mander.info().keys()

dict_keys(['_artifacts', '_logs', '_templates', 'cv_results', 'epochs', 'updated_at'])

In [20]:
import altair as alt
from vega_datasets import data

source = data.cars()

c = alt.Chart(pd.DataFrame(rsh.cv_results_)).mark_circle(size=60).encode(
    x='mean_test_score',
    y='std_test_score',
    tooltip=['iter', 'n_resources', 'params']
).interactive().properties(title='mega important chart')

In [21]:
c

In [22]:
class TemplateRenderer:
    """We do a few things on top of Jinja2 here"""
    def __init__(self, datamander):
        self.datamander = datamander
    
    def clean_value(self, val): 
        return val.replace('/>', '').replace('"', '').replace("'", '')
        
    def insert_custom_ui(datamander, template):
        # For each registered element, check if it is there.
        for name, func in rendering_registry.items():
            element_of_interest = f'<{name}'
            start = template.find(element_of_interest)
            end = template[start:].find("/>")
            substr = template[start:start + end + 2]
            if substr:
                elems = [e.split('=') for e in substr.split(' ') if '=' in e]
                params = {k: clean_value(v) for k, v in elems}
                ui = func(datamander, **params)
                return template.replace(substr, ui)
        return template


def scatter_chart(datamander, title, data, x, y, **kwargs):
    # Grab the dataframe that is assumed to be stored in the datamander.
    dataf = pd.DataFrame(datamander.info()[data])

    # Render the altair chart internally
    c = alt.Chart(dataf).mark_circle(size=60).encode(
        x=x,
        y=y,
    ).interactive().properties(title=title)

    # Add the container width property
    json_blob = json.loads(c.to_json())
    json_blob['width'] = "container"
    
    return f'<vegachart style="width: 100%">{json.dumps(json_blob)}</vegachart>'
        
rendering_registry = {
    'scatter-chart': scatter_chart
}

In [23]:
template = """
<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>
<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>
<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>
<script src="https://cdn.jsdelivr.net/gh/koaning/justcharts/justcharts.js"></script>

This is a dead-simple template for demo purposes.

<scatter-chart title='foobar' data='cv_results' x='mean_test_score' y='std_test_score'/>
"""

In [24]:
mander.add_template('overview', template)

In [25]:
def clean_value(val): 
    return val.replace('/>', '').replace('"', '').replace("'", '')
    
def insert_custom_ui(datamander, template):
    # For each registered element, check if it is there.
    for name, func in rendering_registry.items():
        element_of_interest = f'<{name}'
        start = template.find(element_of_interest)
        end = template[s
       art:].find("/>")
        substr = template[start:start + end + 2]
        if substr:
            elems = [e.split('=') for e in substr.split(' ') if '=' in e]
            params = {k: clean_value(v) for k, v in elems}
            ui = func(datamander, **params)
            return template.replace(substr, ui)
    return template

In [27]:
from IPython.display import display, HTML


final_template = insert_custom_ui(mander, template)

Index(['iter', 'n_resources', 'mean_fit_time', 'std_fit_time',
       'mean_score_time', 'std_score_time', 'param_bootstrap',
       'param_criterion', 'param_max_depth', 'param_max_features',
       'param_min_samples_split', 'params', 'split0_test_score',
       'split1_test_score', 'split2_test_score', 'split3_test_score',
       'split4_test_score', 'mean_test_score', 'std_test_score',
       'rank_test_score', 'split0_train_score', 'split1_train_score',
       'split2_train_score', 'split3_train_score', 'split4_train_score',
       'mean_train_score', 'std_train_score'],
      dtype='object')


In [88]:
elems = [e.split('=') for e in substr.split(' ') if '=' in e]

In [91]:
def clean_value(val): 
    return val.replace('/>', '').replace('"', '').replace("'", '')

{k: clean_value(v) for k, v in elems}

{'title': 'foobar',
 'data': 'cv_results',
 'x': 'mean_test_scores',
 'y': 'std_test_scores'}