In [None]:
import yaml
import dill
import numpy as np
import pandas as pd
import urllib.request
import altair as alt
from multiprocessing import Pool

# Load Data

In [None]:
with open('./beta.yaml') as file:
    Beta = yaml.load(file, Loader=yaml.UnsafeLoader)
with open('./theta.yaml') as file:
    Theta = yaml.load(file, Loader=yaml.UnsafeLoader)

# Load Model

In [None]:
with open('./model.pkl', 'rb') as file:
    BayesGP = dill.load(file)

def f(X):
    P = dict()
    for i, s in X.iterrows():
        px = tuple(
            s[s != 0].sort_values(
                ascending=False
            ).head(5).index)
        if px in P.keys():
            P[px] += 1
        else:
            P.update({px: 1})
    return len(P)


def score(args):
    i, k = args
    X = BayesGP(Beta[k], Theta[i]['theta'])
    return {(i, k): f(X)}

# Training

In [None]:
results = {}

with Pool(processes=16) as pool: 

    for result in pool.imap_unordered(
        score, ((i, k) for i in Theta for k in Beta)):
        key = next(iter(result))
        print(f'Sim. Id: {key}, score={result[key]}')
        results.update(result)

with open('./results.yaml', 'w') as file:
    yaml.dump(results, file)

# Results

In [None]:
with open('./results.yaml') as file:
    results = yaml.load(file, Loader=yaml.UnsafeLoader)

In [None]:
alt.Chart(pd.DataFrame([[*k, results[k]] for k in results], columns=['Theta', 'k', 'score'])).mark_point().encode(x='k', y='score', color='Theta:N', tooltip=['score'])