In [None]:
# for adding the videos to DB
# don't use at the same time with the server running
# https://stackoverflow.com/questions/59119396/how-to-use-django-3-0-orm-in-a-jupyter-notebook-without-triggering-the-async-con
import os
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
#from backend.ml_models import DatabasePreferenceLearner
import numpy as np

from django.contrib.auth.models import User as DjangoUser
from backend.models import Video, UserPreferences
from backend.ml_models import DatabasePreferenceLearner
from backend.rating_fields import VIDEO_FIELDS

#%pylab
%matplotlib inline
#%matplotlib widget

from matplotlib import pyplot as plt
import mplcursors

import sys
import requests
from PIL import Image
import seaborn as sns
import pandas as pd

In [None]:
# loading the model
learner = DatabasePreferenceLearner()

# loading users
users = UserPreferences.objects.all()

def get_videos_and_embeddings(videos):
    """Select only videos with embeddings."""
    videos = [v for v in videos if v.embedding_np is not None]
    embeddings = [v.embedding_np for v in videos]
    embeddings = np.array(embeddings)
    return videos, embeddings

# loading videos with embeddings
videos = Video.objects.all()
videos, embeddings = get_videos_and_embeddings(videos)

# predicting video characteristics
predictor = learner.aggregator

# Show predictions distribution (c.d.f.)

In [None]:
def plot_predictions_distribution(videos, predictor):
    """Plot c.d.f. of predictions."""
    
    videos, embeddings = get_videos_and_embeddings(videos)
    all_predictions = predictor(embeddings)
    
    plt.figure()
    plt.title('Distribution of predictions')
    for i, field in enumerate(VIDEO_FIELDS):
        predictions = all_predictions[:, i]

        plt.plot(sorted(predictions), range(len(predictions)), label=field)
    plt.ylabel('Number of videos')
    plt.xlabel('Predicted value')
    plt.legend()
    plt.show()

plot_predictions_distribution(videos, predictor)

# Show videos from the database

In [None]:
def get_video_picture(video_id, video_index=0):
    """Get video picture as np.array."""
    assert isinstance(video_id, str)
    url = "https://img.youtube.com/vi/%s/%d.jpg" % (video_id, video_index)
    r = requests.get(url, stream=True)
    r.raw.decode_content = True # Content-Encoding
    im = Image.open(r.raw) #NOTE: it requires pillow 2.8+
    return np.array(im)

In [None]:
def plot_videos(videos, num_vids_x=4, num_vids_y=4):
    """Plot videos images."""
    plt.figure()
    plt.suptitle('Videos in the database')
    # num_vids = 16
    
    num_vids = num_vids_x * num_vids_y
    assert num_vids_x * num_vids_y >= num_vids
    vs = np.random.choice(videos, num_vids, replace=False)
    for idx in range(1, num_vids + 1):
        v = vs[idx - 1]
        plt.subplot(num_vids_x, num_vids_y, idx)
        plt.title(v.name[:10] + "...")
        plt.imshow(get_video_picture(v.video_id))
        plt.axis('off')
    plt.show()

In [None]:
plot_videos(videos)

# Plot characteristics summary

In [None]:
def plot_characteristic(videos, all_predictions, field):
    """Show a summary figure for one field."""
    i = VIDEO_FIELDS.index(field)
    predictions = all_predictions[:, i]

    fig, ax = fig, ax = plt.subplots(constrained_layout=True, figsize=(11, 3))
    #fig2, ax2 = plt.subplots()
    f_top = 2
    height = 0.3
    width = 0.3
    num_videos_to_show = 10
    bottom = 0.2 # image bottom

    plt.title("Videos by %s. Hover over the points to see the details" % field)
    r_scatter = plt.scatter(predictions, np.zeros(len(predictions)), alpha=0.1, label='Videos')
    plt.xlabel('Predicted %s' % field)
    plt.ylabel('Videos')
    labels = [v.name for v in videos]
    cursor = mplcursors.cursor(hover=True)
    cursor.connect("add", lambda sel: sel.annotation.set_text(labels[sel.target.index]))

    def show_image_idx(video_idx):
        """Show video picture on the chart."""
        v = videos[video_idx]
        x_coord = predictions[video_idx]
        width = 0.1

        left = x_coord - width / 2
        right = x_coord + width / 2
        top = height + bottom

        plt.imshow(get_video_picture(v.video_id), extent=(left, right, bottom, top), aspect='auto')

    def find_closest_video_with_prediction(val):
        """Find a video with prediction closest to val."""
        dists = np.abs(predictions - val)
        idx = np.argmin(dists)
        return idx

    vals = np.linspace(min(predictions), max(predictions), num_videos_to_show)
    random_idxes = [find_closest_video_with_prediction(v) for v in vals]

    #random_idxes = np.random.choice(range(len(videos)), 10, replace=False)
    for i, idx in enumerate(random_idxes[::-1]):
        show_image_idx(idx)
        text_top = f_top - width / 2
        text_bot = bottom + width * 1.5
        text_delta = text_top - text_bot
        y = text_bot + i * text_delta / num_videos_to_show
        # print(text_top, text_bot, text_delta, y)
        plt.text(predictions[idx], y, videos[idx].name[:50])
        plt.plot([predictions[idx]] * 2, [bottom + width, y], '--', color='gray', alpha=0.5)

    plt.ylim((-width / 5, f_top))
    plt.xlim((min(predictions) - width / 2, max(predictions) + width * 3))
    frame1 = plt.gca()
    frame1.axes.yaxis.set_ticklabels([])
    frame1.axes.xaxis.set_ticklabels([])
    #fig.axes.get_yaxis().set_visible(False)
    
    ax2 = ax.twinx()
    r_hist = sns.distplot(predictions, ax=ax2,
                 hist_kws={"alpha": 0.2},
                 kde_kws={"alpha": 0.2},
                 label='Video density')
    
    handles, labels = [], []
    h, l = ax.get_legend_handles_labels()
    handles += h
    labels += l
    h, l = ax2.get_legend_handles_labels()
    handles += h
    labels += l
    plt.legend(handles, labels)
    
    plt.show()

In [None]:
def plot_predictions(videos, predictor):
    """Plot characteristics predictions."""
    
    videos, embeddings = get_videos_and_embeddings(videos)
    all_predictions = predictor(embeddings)
    
    for field in VIDEO_FIELDS:
        plot_characteristic(field=field,
                            all_predictions=all_predictions,
                            videos=videos)

In [None]:
plot_predictions(videos, predictor)

# Ratings for a particular user

In [None]:
user_to_nratings = [{'username': u.user.username, 'ratings': len(ExpertRating.objects.filter(user=u))} for u in users]
df = pd.DataFrame(user_to_nratings)
df = df.sort_values('ratings', ascending=False)
df = df[df.ratings > 0]
print(df)
sns.barplot(x='username',y='ratings', data=df, palette='Spectral')
plt.show()

In [None]:
def username_to_model(username='sergei'):
    """Get a model from a username."""
    user = DjangoUser.objects.get(username=username)
    user = UserPreferences.objects.get(user=user)
    model = learner.user_to_model[user.id]
    return model

In [None]:
plot_predictions(videos, username_to_model(username='srlt'))