# `mle-monitor`: Lightweight Resource Monitoring
### Author: [@RobertTLange](https://twitter.com/RobertTLange) [Last Update: October 2021][![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RobertTLange/mle-monitor/blob/main/examples/getting_started.ipynb)

In [1]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'

try:
    import mle_monitor
except:
    !pip install -q mle-monitor
    import mle_monitor

```python
from mle_monitor import MLEProtocol, MLEResource, MLEDashboard

# Instantiate protocol - load local db & get summary
protocol = MLEProtocol(protocol_fname="~/local_mle_protocol.db")
protocol.summary()

# Instantiate resource monitoring & get stats
resource = MLEResource(resource_name="slurm")
resource.monitor()

# Instantiate monitoring dashboard & run in while-loop
dashboard = MLEDashboard(protocol, resource)
dashboard.live()
```

# Pillar I: `MLEProtocol`

In [7]:
from mle_monitor import MLEProtocol

# Load the protocol from a local file (create new if it doesn't exist yet)
protocol = MLEProtocol(protocol_fname="mle_protocol.db")

In [8]:
meta_data = {"purpose": "Test MLEProtocol",
             "project_name": "MNIST",
             "exec_resource": "local",
             "experiment_dir": "log_dir",
             "experiment_type": "hyperparameter-search",
             "config_fname": "base_config.json",
             "num_seeds": 5,
             "num_total_jobs": 10,
             "num_job_batches": 2,
             "time_per_job": "00:05:00",  # days-hours-minutes
             "num_cpus": 2,
             "num_gpus": 1} 
e_id = protocol.add(meta_data, save=True)
protocol.get(e_id)

{'purpose': 'Test MLEProtocol',
 'project_name': 'MNIST',
 'exec_resource': 'local',
 'experiment_dir': 'log_dir',
 'experiment_type': 'hyperparameter-search',
 'config_fname': 'base_config.json',
 'num_seeds': 5,
 'num_total_jobs': 10,
 'num_job_batches': 2,
 'time_per_job': '00:05:00',
 'num_cpus': 2,
 'num_gpus': 1,
 'git_hash': 'a0749efe640e23b96bcdacdb22c563b126b94548',
 'loaded_config': [{'train_config': {'lrate': 0.1},
   'model_config': {'num_layers': 5},
   'log_config': {'time_to_track': ['step_counter'],
    'what_to_track': ['loss'],
    'time_to_print': ['step_counter'],
    'what_to_print': ['loss'],
    'print_every_k_updates': 10,
    'overwrite_experiment_dir': 1}}],
 'e-hash': '300a7b5fe20486cb73985ff21a7d97c8',
 'retrieved_results': False,
 'stored_in_cloud': False,
 'report_generated': False,
 'job_status': 'running',
 'start_time': '10/28/2021 21:22:43',
 'duration': '00:10:00',
 'stop_time': '10/29/2021 07:22:43'}

In [6]:
# Print a summary of the last experiments
sub_df = protocol.summary()

# Pillar II: `MLEResource`

In [None]:
from mle_monitor import MLEResource

resource = MLEResource()

len(resource.monitor())
resource.monitor()

# Pillar III: MLEDashboard

In [None]:
from mle_monitor import MLEDashboard

dashboard = MLEDashboard(protocol, resource)

In [None]:
dashboard.snapshot()