In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from scipy.interpolate import UnivariateSpline as uvspline
from datetime import datetime
from exploration.config import mongo_inst
from mlpp.data_collection.sample_func import sampleFuncGenerator as sfg

In [None]:
 NUM_BINS = 200
MAX_PP = 7000
DATE_LIMIT = datetime(2019,1,1)

osu_random_db = mongo_inst['osu_random_db']

osu_dump = (osu_random_db['osu_scores_high'], osu_random_db['osu_user_stats'])
osu_scores_high, osu_user_stats = osu_dump

val_random_db = mongo_inst['val_random_db']
val_dump = (val_random_db['osu_scores_high'], val_random_db['osu_user_stats'])

generator = sfg(date_limit = DATE_LIMIT, max_pp = MAX_PP, n_bins = NUM_BINS)

In [None]:
cache = np.zeros((NUM_BINS, NUM_BINS))

for i in tqdm(range(NUM_BINS)):
    j = i * 35

    u_bins_score_cnt = np.zeros(NUM_BINS)
    pipeline = generator.range_score_hist_pipeline(val_dump[1], j, j + 35)

    for bin_cnt in val_dump[0].aggregate(pipeline):
        cache[i][int(bin_cnt['_id'])] = bin_cnt['count']

In [None]:
A = cache.T
b = np.full(200, val_dump[0].count() / NUM_BINS * .01)
n = len(b)

In [None]:
bounds = np.column_stack((pdf_fit[::35]*.25, np.full(NUM_BINS, .5)) )

In [None]:
fun_reg = lambda x: np.linalg.norm(np.dot(A,x)-b)
fun_mse = lambda x: np.linalg.norm(((np.dot(A,x)-b) ** 2).mean())

sol_reg = minimize(fun_reg, np.zeros(n), method='L-BFGS-B', bounds=bounds)
sol_mse = minimize(fun_mse, np.zeros(n), method='L-BFGS-B', bounds=bounds)

In [None]:
y_reg, y_mse = sol_reg['x'], sol_mse['x']

In [None]:
plt.plot(A @ y_reg)
plt.plot(A @ y_mse)

In [None]:
x = np.arange(200)
ys = uvspline(x, sorted(y_reg), s = .02)

In [None]:
borders = np.column_stack((pdf_fit[::35]*.5, pdf_fit[::35]*1.25) )

In [None]:
borders[0]

In [None]:
plt.plot(y_mse)
# plt.plot(sorted(y_reg))
# plt.plot(ys(np.arange(200)))

In [None]:
sample_pp, sample_users = generator.simulate(*val_dump, np.repeat(y_reg, 35))
# sample_pp, sample_users = generator.simulate(*val_dump, ys(np.arange(7000)/35))

In [None]:
def test_fit(fit, dump = osu_dump, trials = 10):
    err = 0
    for _ in tqdm(range(trials)):
        sample_pp, sample_users = generator.simulate(*dump, fit)
        err += sfg.prop_displaced(sample_pp)
    return err / trials

In [None]:
greedy_fit = np.load("exploration/skill_biased_sampling_function/greedy_sample_func.npy")
pdf_fit = np.load("exploration/skill_biased_sampling_function/pdf_sample_func.npy")

In [None]:
test_fit(greedy_fit, val_dump, 50)

In [None]:
plt.plot(greedy_fit)
plt.plot(pdf_fit)
plt.plot(ys(np.arange(7000)/35))

In [None]:
test_fit(pdf_fit, val_dump, 50)

In [None]:
test_fit(np.repeat(y_mse, 35), val_dump, 50)

In [None]:
plt.hist(sample_pp, bins = 50)