In [8]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import offsetbox
from sklearn import (manifold, datasets, decomposition, ensemble,
                     discriminant_analysis, random_projection)
from MulticoreTSNE import MulticoreTSNE as TSNE
from sklearn.preprocessing import StandardScaler

In [9]:
from mangaki.utils.data import Dataset
from mangaki.utils.values import rating_values
dataset = Dataset()
triplets = Rating.objects.values_list('user_id', 'work_id', 'choice')
dataset.make_anonymous_data(triplets)
dataset.save('ratings.pkl')
dataset.load('ratings.pkl')

In [15]:
import scipy.sparse as sp
scaler = StandardScaler()
X = scaler.fit_transform(
    sp.coo_matrix((dataset.anonymized.y, (dataset.anonymized.X[:, 0], dataset.anonymized.X[:, 1]))).toarray().astype(np.float64)
)
print(X.shape)

(2071, 10086)


In [16]:
M = X.T
print(M.shape)

(10086, 2071)


In [17]:
model = TSNE(n_components=2, init='pca', verbose=2, n_jobs=4)
X_2d = model.fit_transform(M)

In [23]:
mds = manifold.MDS()
X_2d_mds = mds.fit_transform(M)

In [26]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource,LabelSet

class MangakiViz:
    def __init__(self, titles, coords, drawn):
        work_ids = dataset.encode_works(drawn)
        self.source = ColumnDataSource(
            data={
                'x': coords[work_ids, 0],
                'y': coords[work_ids, 1],
                'titles': [titles[work_id] for work_id in drawn]
            }
        )
        self.p = figure(
            title='MDS embedding of Mangaki',
        )
        self.p.scatter(x='x', y='y', size=8, source=self.source)
        self.labels = LabelSet(x='x', y='y', text='titles', level='glyph', x_offset=5, y_offset=5, source=self.source,
                              render_mode='canvas')
        self.p.add_layout(self.labels)
        show(self.p)

output_notebook()
titles = dict(Work.objects.values_list('id', 'title'))
popular = list(Work.objects.popular()[:200].values_list('id', flat=True))
sampled_items = []
viz = MangakiViz(titles, X_2d_mds, popular)