In [None]:
# for adding the videos to DB
# don't use at the same time with the server running
# https://stackoverflow.com/questions/59119396/how-to-use-django-3-0-orm-in-a-jupyter-notebook-without-triggering-the-async-con
import os
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

from backend.models import Video, ExpertRating, UserPreferences
from django.contrib.auth.models import User as DjangoUser
import base64
import numpy as np
from sklearn.decomposition import PCA
%matplotlib inline
from matplotlib import pyplot as plt
from django.db.models import Q

In [None]:
# obtaining all videos
videos = list(Video.objects.all())

In [None]:
print("Number of videos in db: %d" % len(videos))

In [None]:
print("Percentage of videos with a name: %.1f" % \
      (100 * np.mean([len(v.name) > 5 for v in videos])))

In [None]:
print("Percentage of videos with a description: %.1f" % \
      (100 * np.mean([len(v.description) > 5 if v.description else 0 for v in videos])))

In [None]:
print("Percentage of videos with text: %.1f" % \
      (100 * np.mean([len(v.caption_text) > 5 if v.caption_text else 0 for v in videos])))

In [None]:
print("Percentage of videos with an embedding: %.1f" % \
      (100 * np.mean([v.get_embedding_np_array() is not None for v in videos])))

# Expert Ratings

In [None]:
comparisons = [x.features_as_vector_centered() for x in ExpertRating.objects.all()]
comparisons = np.array(comparisons)

In [None]:
print("Number of comparisons: %d" % len(comparisons))

In [None]:
for comp in range(comparisons.shape[1]):
    plt.hist(comparisons[:, comp], alpha=0.5)
    plt.show()

### Plotting all the videos with embeddings

In [None]:
# loading them
videos_with_emb = [v for v in videos if v.get_embedding_np_array() is not None]

In [None]:
# all embeddings
embeddings = [v.get_embedding_np_array() for v in videos_with_emb]

In [None]:
# PCA of embeddings
pca_emb = PCA(n_components=2).fit_transform(embeddings)

In [None]:
# plotting all videos
plt.figure(figsize=(10, 10))
plt.title("All videos: embedding")
plt.scatter(pca_emb[:, 0], pca_emb[:, 1])
for i, v in enumerate(videos_with_emb):
    v_emb = pca_emb[i]
    plt.text(*v_emb, v.name)
plt.xlabel("PCA component 1")
plt.ylabel("PCA component 2")
plt.show()

### Number of ratings for a video
Developing a function to select best videos to rate

In [None]:
username = 'sergei'
django_user = DjangoUser.objects.get(username=username)
user_preferences = UserPreferences.objects.get(user=django_user)

In [None]:
def num_ratings_to_prob_selection(x, T=1):
    """Number of ratings -> probability of selection to rate."""
    y = np.exp((-T) * np.array(x))
    return y / np.sum(y)
num_ratings_to_prob_selection([v.n_ratings() for v in videos])

In [None]:
def sample_video_to_rate():
    """Sample one video to rate."""
    videos = Video.objects.all()
    return np.random.choice(videos, p=num_ratings_to_prob_selection([v.n_ratings() for v in videos]))

In [None]:
def selection_loop(user, max_trials=50):
    """Select two videos which were not rated yet."""
    for _ in range(max_trials):
        video_1 = sample_video_to_rate()
        video_2 = sample_video_to_rate()
        if len(ExpertRating.objects.filter(video_1=video_1, video_2=video_2, user=user)):
            continue
        return [video_1, video_2]
    return None

In [None]:
# calling inline fcn
np.random.seed(42)
selection_loop(user_preferences)

In [None]:
# calling same stuff via the model
np.random.seed(42)
ExpertRating.two_videos_to_rate(django_user)