# Analysis of Generated DNF data
This notebook collects and analyses experiment results that are run on the generated DNF dataset.

In [None]:
# Collect imports
from typing import Dict
from pprint import pprint

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import mlflow
from mlflow.tracking import MlflowClient

import utils.analysis

np.set_printoptions(suppress=True, precision=5, linewidth=120)

In [None]:
# Setup mlflow client
mlclient = MlflowClient(tracking_uri="http://localhost:8888") # Emptry string for local
exp_list = [x.name for x in mlclient.list_experiments()]
print(exp_list)

## Aggregate Experiment Results
We will look at aggregate metric performances of runs in a given experiment.

In [None]:
# Explore experiment and run data
experiment_name = "20200806-104823"
experiment_name = exp_list[-1]
mlexp = mlclient.get_experiment_by_name(experiment_name)
print(mlexp)

In [None]:
exp_data = utils.analysis.collect_experiment_data(experiment_name)
exp_data.columns

In [None]:
# Plot trainint and test results
df = exp_data.assign(relsgame_tasks=exp_data['relsgame_tasks'].map(lambda liststr: ','.join(sorted(eval(liststr)))))
df = df[((df['learning_rate'] == 0.01) & (df['dnf_image_activation'] == 'tanh'))]
display(df.groupby(['run_id']).count())
fields = [k for k in df.columns if k.endswith('loss') and k != "converged_loss"]
fields = [k for k in exp_data.columns if k.endswith('acc')]
df = df.melt(id_vars=['epoch', 'relsgame_tasks', 'relsgame_train_size'], value_vars=fields)
sns.relplot(x='epoch', y='value', hue='variable', kind='line', row='relsgame_tasks', col='relsgame_train_size', ci='sd', data=df)
# Plot accuracy
#fields = [k for k in exp_data.columns if k.endswith('acc')]
#plot_data = exp_data.melt(id_vars=['step'], value_vars=fields)
#sns.relplot(x='step', y='value', hue='variable', kind='line', data=plot_data)

## Analyse Run Report
We can pick a single run and analyse the reports such as attention maps and the rules it has learned.

In [None]:
df = exp_data.assign(relsgame_tasks=exp_data['relsgame_tasks'].map(lambda liststr: ','.join(sorted(eval(liststr)))))
df[df['relsgame_tasks'] == '']['run_id'].unique()

In [None]:
# Gather a specific run
run_id = "107801d186c54c6f8f8d431426b284c0"
#run_id = exp_data.groupby(by="run_id").min().index[-1]
print("Collecting artifacts for run:", run_id)
mlrun = mlclient.get_run(run_id)
pprint(mlrun.to_dictionary())
pprint(mlclient.list_artifacts(run_id))

In [None]:
def load_report(run_id: str, fpath: str):
    """Load numpy compressed report from given artifact."""
    local_path = mlclient.download_artifacts(run_id, fpath)
    return np.load(local_path)

report = load_report(run_id, "train_report.npz")
print(report.files)

In [None]:
idxs=np.arange(8)

In [None]:
# Let's pick some examples to analyse
#labels = np.argmax(report['out_label'], -1)
labels = report['out_label']
predictions = np.argmax(report['prediction_label'], -1)
idxs = np.flatnonzero(labels != predictions)[:8]
#idxs = np.arange(8)
print(idxs)
print(np.stack([labels[idxs], predictions[idxs]]))
print(report['prediction_label'][idxs])

In [None]:
report['and_kernel']

In [None]:
report['or_kernel']

In [None]:
report['binary'][:2]