# Collectl Dsk Log Analysis

## Functionalities
- Plot disk I/O utilization graphs.

## Input
Log files are read from a directory in `../data`. This directory is assumed to have the following structure:
```
logs/
  [node-1]/
    collectl.tar.gz
  ...
  [node-n]/
    collectl.tar.gz
```

## Notebook Configuration

In [None]:
########## GENERAL
# Name of the directory in `../data`
EXPERIMENT_DIRNAME = "BuzzBlogBenchmark_[TIMESTAMP]"

## Notebook Setup

In [None]:
import os
import sys

sys.path.append(os.path.abspath(os.path.join("..")))
from utils.utils import *
from utils.plot_experiment_graphs import CollectlDskLogAnalysis

experiment_dirpath = os.path.join(os.path.abspath(""), "..", "data", EXPERIMENT_DIRNAME)

## Log Parsing & Processing

In [None]:
collectl_dsk_log_analysis = CollectlDskLogAnalysis(experiment_dirpath)

## Disk Monitoring (1-sec window)

In [None]:
########## DISK I/O
# Analyzed metric (options: "name", "reads", "rmerge", "rkbytes", "waitr", "writes", "wmerge", "wkbytes", "waitw",
# "request", "quelen", "wait", "svctim", "util")
COLLECTL_DSK_METRIC = "writes"

collectl_dsk_log_analysis.plot_averaged_dsk_metric(dsk_metric=COLLECTL_DSK_METRIC)

## Disk Monitoring (millisec window)

In [None]:
########## ZOOM IN
# Minimum and maximum time (in sec)
INTERVAL = None
########## DISK I/O
# Analyzed metric (options: "name", "reads", "rmerge", "rkbytes", "waitr", "writes", "wmerge", "wkbytes", "waitw",
# "request", "quelen", "wait", "svctim", "util")
COLLECTL_DSK_METRIC = "writes"

collectl_dsk_log_analysis.plot_dsk_metric(dsk_metric=COLLECTL_DSK_METRIC, interval=INTERVAL)

## Summary

In [None]:
stats = collectl_dsk_log_analysis.calculate_stats()
for node_name in get_node_names(experiment_dirpath):
  node_label = get_node_label(experiment_dirpath, node_name)
  print("%s: %s" % (node_name, node_label))
  for dsk_metric in ["reads", "writes", "rkbytes", "wkbytes", "quelen", "wait"]:
    print("  %s" % dsk_metric)
    print("  P99.9: %3.2f" % (stats["collectl_dsk_%s_%s_p999" % (node_label, dsk_metric)]))
    print("    P99: %3.2f" % (stats["collectl_dsk_%s_%s_p99" % (node_label, dsk_metric)]))
    print("    P95: %3.2f" % (stats["collectl_dsk_%s_%s_p95" % (node_label, dsk_metric)]))
    print("    P50: %3.2f" % (stats["collectl_dsk_%s_%s_p50" % (node_label, dsk_metric)]))
    print("    Avg: %3.2f" % (stats["collectl_dsk_%s_%s_avg" % (node_label, dsk_metric)]))
    print("    Std: %3.2f" % (stats["collectl_dsk_%s_%s_std" % (node_label, dsk_metric)]))
    print("    Max: %3.2f" % (stats["collectl_dsk_%s_%s_max" % (node_label, dsk_metric)]))