# Movie Embedding Visualization

In [None]:
!pip install -r notebook-requirements.txt

In [1]:
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE

In [2]:
movie_embs = np.load('../data/model/movie_embeddings.npy')
movies = pd.read_parquet('../data/movies.parquet')

In [3]:
movie_embs_2d = TSNE(n_components=2).fit_transform(movie_embs)

In [23]:
from bokeh.plotting import figure, show
from bokeh.models import LabelSet, ColumnDataSource, CategoricalColorMapper
from bokeh.palettes import d3
from sklearn.cluster import KMeans

In [15]:
kmeans = KMeans(n_clusters=15, random_state=0).fit(movie_embs_2d)

In [39]:
km_labels = [str(label) for label in kmeans.labels_]

In [42]:
palette = d3['Category20'][15]
string_range = [str(x) for x in range(15)]
color_map = CategoricalColorMapper(factors=string_range,
                                   palette=palette)

In [43]:
p = figure(sizing_mode="stretch_width", max_width=1000, plot_height=1000)
#circle = p.circle(movie_embs_2d[:,0], movie_embs_2d[:,1], fill_color="blue", size=5)
source = ColumnDataSource(data=dict(x=movie_embs_2d[:,0],
                                    y=movie_embs_2d[:,1],
                                    titles=movies.movie_title,
                                    label=km_labels))
p.scatter(x='x', y='y', size=8, source=source, color={'field': 'label', 'transform': color_map})
labels = LabelSet(x='x', y='y', text='titles', x_offset=5, y_offset=5, source=source, render_mode='canvas', text_font_size={'value': '8px'})
p.add_layout(labels)

In [44]:
show(p)