In [1]:
%pip install diskcache

Collecting diskcache
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: diskcache
Successfully installed diskcache-5.6.3
Note: you may need to restart the kernel to use updated packages.


In [102]:
from diskcache import Cache
from pathlib import Path

import time

class ArtifactStorage:
    def __init__(self, path='artifacts'):
        self.cache = Cache(path)

    def write_artifact(self, name, artifact):
        if name not in self.cache:
            self.cache[name] = 0
        version = self.cache[name] + 1
        self.cache[name] = version
        self.cache[f'{name}-{version}'] = artifact
        return {'reference': f'{name}-{version}', 'name': name, 'version': version}

    def get_artifact(self, name, version):
        return self.cache[f'{name}-{version}']

class StatStorage:
    def __init__(self, path, project_name, module_name, session_name=None):
        if not session_name:
            session_name = f'{int(time.time())}'
        self.cache = Cache(Path(path) / project_name / module_name / session_name)
        self.cache['created_at'] = int(time.time())
        self.session_name = session_name

    def write_stat(self, key, value, method='overwrite'):
        if method == 'overwrite':
            self.cache[key] = value
        if method == 'append':
            if not isinstance(value, list):
                value = [value]
            if key in self.cache:
                value = value + self.cache[key]
            self.cache[key] = value
        self.cache['updated_at'] = int(time.time())

    def give_stats(self):
        return {k: self.cache[k] for k in self.cache.iterkeys()}

In [111]:
from datamander import StatStorage, ArtifactStorage

In [114]:
stat_manager = StatStorage('local', 'arxiv-frontpage', 'iris')

artifact_manager = ArtifactStorage('local')

In [115]:
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

X, y = datasets.load_iris(return_X_y=True)
Xt, Xv, yt, yv = train_test_split(X, y)

In [116]:
lr = LogisticRegression().fit(Xt, yt)
info = artifact_manager.write_artifact('iris', lr)

stat_manager.write_stat('report_valid', classification_report(yv, lr.predict(Xv), output_dict=True))
stat_manager.write_stat('artifact_info', info)

In [117]:
stat_manager.give_stats()

{'artifact_info': {'reference': 'iris-4', 'name': 'iris', 'version': 4},
 'created_at': 1716825962,
 'report_valid': {'0': {'precision': 1.0,
   'recall': 1.0,
   'f1-score': 1.0,
   'support': 12.0},
  '1': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 15.0},
  '2': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 11.0},
  'accuracy': 1.0,
  'macro avg': {'precision': 1.0,
   'recall': 1.0,
   'f1-score': 1.0,
   'support': 38.0},
  'weighted avg': {'precision': 1.0,
   'recall': 1.0,
   'f1-score': 1.0,
   'support': 38.0}},
 'updated_at': 1716825963}

In [119]:
artifact_manager.get_artifact('iris', 4)