In [None]:
import os

# for adding the videos to DB
# don't use at the same time with the server running
# https://stackoverflow.com/questions/59119396/how-to-use-django-3-0-orm-in-a-jupyter-notebook-without-triggering-the-async-con
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

from backend.models import ExpertRating, Video, VideoRating
import numpy as np
from backend.rating_fields import VIDEO_FIELDS, VIDEO_FIELDS_DICT
from tqdm import tqdm
import pandas as pd
from backend.models import VideoRating
import seaborn as sns
from matplotlib import pyplot as plt

# Rating of channels

In [None]:
videos = list(Video.objects.all())

In [None]:
videos_with_rating = []
for video in videos:
    ratings = VideoRating.objects.filter(video=video)
    ratings = {f: np.median([getattr(r, f) for r in ratings]) for f in VIDEO_FIELDS}
    
    ratings['video'] = video
    ratings['channel'] = video.uploader
    videos_with_rating.append(ratings)
    
videos_with_rating = pd.DataFrame(videos_with_rating)

In [None]:
df_by_channel = videos_with_rating.groupby('channel').median()

In [None]:
df_by_channel.hist()

In [None]:
sorted_by = {VIDEO_FIELDS_DICT[f]: [f"{x[0]} ({round(x[1], 2)})" for x in sorted(list(dict(getattr(df_by_channel, f)).items()), key=lambda x: -x[1])] for f in VIDEO_FIELDS}

In [None]:
pd.DataFrame(sorted_by,)

# Comparing experts

In [None]:
from sklearn.manifold import Isomap

In [None]:
users = list(UserPreferences.objects.all())
videos = list(Video.objects.all())

In [None]:
def get_user_vector(u):
    """Get vector of ratings for a user."""
    # number of ratings
    r = len(ExpertRating.objects.filter(user=u))
    result = [] #[r]
    
    for f in VIDEO_FIELDS:
        for v in videos:
            try:
                r = VideoRating.objects.get(user=u, video=v)
                cr = [getattr(r, f)]
            except:
                cr = [0]

            result += cr
    
    return result

In [None]:
user_vectors = [get_user_vector(u) for u in users]
user_vectors = np.array(user_vectors)

In [None]:
plt.title('Experts feature matrix')
sns.heatmap(user_vectors[:, 1:])
plt.show()

In [None]:
user_vectors_mapped = Isomap(n_components=2).fit_transform(user_vectors)

In [None]:
plt.title('Experts isomap')
plt.scatter(*zip(*user_vectors_mapped))
for i, u in enumerate(users):
    offset = np.random.rand(2) * 3
    plt.text(*(user_vectors_mapped[i] + offset), u, alpha=5)
plt.show()