In [1]:
import sys
sys.path.append('../..')

import choix
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

from config import client
from mlpp.data_modeling.bradley_terry import prepare_comparisons, beatmap_frequency

In [75]:
osu_db = client['osu_random_db']
comps = np.load("comparisons_3k.npy")
MATRIX_SIZE = 100

In [76]:
freqs = beatmap_frequency(comps).T
top_ids = freqs[0][:MATRIX_SIZE]

index_comps, index = prepare_comparisons(comps, beatmaps=top_ids)

In [77]:
bms = list(osu_db['osu_beatmaps'].find({'_id': {'$in': top_ids.tolist()}}))
bms.sort(key = lambda bm: index[bm['_id']])

star_diffs = np.fromiter(map(lambda bm: bm['difficultyrating'], bms), dtype='float64')

In [78]:
params = choix.lsr_pairwise(MATRIX_SIZE, index_comps, alpha=.0001)

In [79]:
min_delta = np.min(star_diffs) - np.min(params)
bt_diffs = params + min_delta

In [96]:
%matplotlib widget
_ = plt.hist(star_diffs, bins = 50, alpha = .8)
_ = plt.hist(bt_diffs, bins = 50, alpha = .8)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [69]:
for i in range(MATRIX_SIZE):
    bms[i]['bt_diff'] = bt_diffs[i]
df = pd.DataFrame(bms)
df.rename(columns = {'difficultyrating':'star_diff'}, inplace = True)

In [70]:
df_slice = df[["_id", "filename", "star_diff", "bt_diff"]]

In [71]:
df_slice

Unnamed: 0,_id,filename,star_diff,bt_diff
0,1892353,Koda Kumi - Guess Who Is Back (TV Size) (Sotar...,3.52259,3.783101
1,737284,Stonebank - The Pressure (Asserin) [Light Insa...,3.89490,3.683881
2,872455,toby fox - Hopes and Dreams (pkk) [Hard].osu,2.99997,2.280757
3,872456,toby fox - Hopes and Dreams (pkk) [Normal].osu,2.16526,1.962206
4,1572874,Helblinde - Putin's Boner (Sotarks) [Kin's Har...,3.55288,2.841478
...,...,...,...,...
995,1118196,Porter Robinson & Madeon - Shelter (Monstrata)...,4.37942,4.669754
996,847863,Feint - Tower Of Heaven (You Are Slaves) (eLy)...,3.30733,1.827098
997,172024,Avicii - Levels (Nightcore Mix) (Kiyro) [Riki'...,1.83799,1.759087
998,1992698,LiSA - Gurenge (TV Size) (xChippy) [Hard].osu,3.04237,5.191350


In [72]:
%matplotlib widget
sns.regplot(x="star_diff", y="bt_diff", data = df_slice,  scatter_kws={'s':5})

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='star_diff', ylabel='bt_diff'>

In [73]:
np.corrcoef(star_diffs, bt_diffs)

array([[1.        , 0.71831267],
       [0.71831267, 1.        ]])

In [74]:
%matplotlib widget
m, b = np.polyfit(star_diffs, bt_diffs, 1)
plt.scatter(x=star_diffs, y=bt_diffs, c=freqs[1][:MATRIX_SIZE], s=5, cmap="viridis_r")
plt.plot(star_diffs, m * star_diffs + b)
plt.title("Diff for top 100 Beatmaps")
plt.xlabel("star_diff")
plt.ylabel("bt_diff")
plt.colorbar()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.colorbar.Colorbar at 0x7fae7bf489a0>