In [112]:
# don't use at the same time with the server running
# https://stackoverflow.com/questions/59119396/how-to-use-django-3-0-orm-in-a-jupyter-notebook-without-triggering-the-async-con
import os
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

import json
import pickle
import numpy as np
from django.contrib.auth.models import User as DjangoUser
from backend.models import UserPreferences, Video, VideoRating
from backend.rating_fields import VIDEO_FIELDS
from tqdm.auto import tqdm
from IPython.display import HTML, display
import pandas as pd

In [113]:
results_dir = '/root/ray_results_sinh_loss/featureless_tournesol_lambda'
usernames_to_export = ['__aggregate_expert__', 'le_science4all', 'aidjango', 'sergei']

In [114]:
def load_results(results_dir, usernames_set=None):
    """Load the results from all experiments in a directory."""
    exps = list(filter(lambda x: x.startswith('experiment_'), sorted(os.listdir(results_dir))))
    result = {}
    
    if usernames_set is None:
        usernames_set = set()
    usernames_set = set(usernames_set)
    
    id_to_username = {x.id: x.user.username for x in UserPreferences.objects.all()
                      if x.user.username in usernames_set}

    for exp in tqdm(exps):
        params_path = os.path.join(results_dir, exp, 'params.json')

        if not os.path.isfile(params_path):
            continue

        with open(params_path, 'r') as f:
            params = json.loads(f.read())

        ckpt_path = os.path.join(results_dir, exp, 'checkpoint_50000', 'learner_ckpt.pkl')

        if not os.path.isfile(ckpt_path):
            continue

        with open(ckpt_path, 'rb') as f:
            ckpt = pickle.load(f)

        all_ratings = ckpt['aggregator']['ratings']
        idx = all_ratings['layer']['idx']
        data = all_ratings['data']
        objects = all_ratings['objects']
        features = all_ratings['features']
        experts = all_ratings['experts']

        common_expert = experts[-1]
        common_expert_id = len(experts) - 1

        def get_video_scores(video_id, expert):
            video_id = objects.index(video_id)
            video_scores = []
            expert_id = experts.index(expert)
            for feature_id, feature in enumerate(features):
                try:
                    idx_v = idx.get_key((expert_id, video_id, feature_id))
                    score_v = data[idx_v]
                except KeyError:
                    score_v = None
                video_scores.append(score_v)
            return np.array(video_scores, dtype=np.float32)

        result[exp] = {'params': params,
                       'objects': objects,
                       'features': features,
                       'experts': experts}
        
        
        experts_export = []
        for i, expert in enumerate(tqdm(experts, leave=False)):
            if (expert not in id_to_username) and (expert != common_expert):
                continue
            expert_name = id_to_username.get(expert, expert)
            scores = np.array([get_video_scores(vid, expert=expert) for vid in objects])
            result[exp]['scores:' + str(expert_name)] = scores
            experts_export.append(str(expert_name))
            
        result[exp]['experts_export'] = experts_export
        
        
    return result

In [115]:
results = load_results(results_dir, usernames_set=usernames_to_export)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2104.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2104.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2104.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2104.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2104.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2104.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2104.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2104.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2104.0), HTML(value='')))




In [116]:
def write_result_to_html(result, top_bot_n=5):
    """Get processed results into .html for lambda and mu."""

    all_params = []
    names = None

    video_to_info = {v.video_id: {'name': v.name, 'uploader': v.uploader}
                     for v in Video.objects.all()}
    
    out_files = []

    all_objects = [x['objects'] for x in result.values()]
    assert all([all_objects[0] == o for o in all_objects])
    all_features = [x['features'] for x in result.values()]
    assert all([all_features[0] == o for o in all_features])
    #all_experts = [x['experts'] for x in result.values()]
    #assert all([all_experts[0] == o for o in all_experts])

    for f in tqdm(VIDEO_FIELDS):
    # f = VIDEO_FIELDS[0]

        for username in list(result.values())[0]['experts_export']:
            
            mus = set()
            lambdas = set()

            df_by_param = {}
        
            for res in result.values():

                params = res['params']
                scores = res[f'scores:{username}']
                all_params.append(params)

                lam = params['_gin__lam__grid_search']
                mu = params['_gin__mu__grid_search']

                param_desc = f"lam={lam} mu={mu}"

                mus.add(mu)
                lambdas.add(lam)

                df = pd.DataFrame(scores, columns=all_features[0], index=[x for x in all_objects[0]])
                
                def get_title(vid):
                    if vid not in video_to_info or video_to_info[vid]['uploader'] is None:
                        return None, None
                    else:
                        return video_to_info[vid]['uploader'], video_to_info[vid]['name']
                
                names = []
                uploaders = []
                for vid in df.index:
                    uploader, name = get_title(vid)
                    names.append(name)
                    uploaders.append(uploader)
                df['name'] = names
                df['uploader'] = uploaders
                df['video_id'] = list(df.index)
                # print(df['name'])
                #df.index = [x for x in df.index]

                df = df[~pd.isna(df[f])]
                
                df_top = df.sort_values(f, ascending=False)[:top_bot_n]
                df_bot = df.sort_values(f, ascending=False)[-top_bot_n:]
                top_bot = pd.concat([df_top, pd.DataFrame([{'video_id': "",
                                                            'name': "",
                                                            'uploader': ""}], columns=df_top.columns), df_bot])
#                 df_top_bot = top_bot[['name', f]]
                df_top_bot = top_bot
                df_by_param[(lam, mu)] = df_top_bot
                
#                 df_top_bot.index = [x[:5] for x in df_top_bot.index]
#                 df_top_bot[f] = [f'' for x in df_top_bot[f]]

            html = ''
            html += f'<h3>username: {username} feature={f}</h3><hr />'
            html += f"<p>runs: {len(result)} first run: {list(result.keys())[0]}</p>"
            html += f"<p>directory: {results_dir}</p>"
            html += '<table>'

            html += '<tr><th></th>'
            for mu in sorted(mus):
                html += f'<th>mu={mu}</th>'
            html += '</tr>'

            for lam in sorted(lambdas):
                html += "<tr>"

                html += f"<th>lam={lam}</th>"

                for mu in sorted(mus):
                    key = (lam, mu)
                    if key not in df_by_param:
                        html += "<td>--</td>"
                    else:
                        
                        df_html = f'<table border="1"><tr><th>vid id</th><th>Uploader</th><th>Video title</th><th>{f}</th></tr>'
                        
                        for i, (_, row) in enumerate(df_by_param[key].iterrows()):
                            if i < top_bot_n:
                                color = 'green'
                            elif i == top_bot_n:
                                color = ''
                            else:
                                color = 'red'
                            
                            df_html += f'<tr>'
                            df_html += f'<td><a href="https://www.youtube.com/watch?v={row.video_id}">{row.video_id[:5]}</a></td>'
                            df_html += f'<td>{row.uploader}</td>'
                            df_html += f'<td>{row["name"][:20]}</td>'
                            df_html += f'<td><font color="{color}">{row[f]:.3e}</font></td>'
                            df_html += '</tr>'
                            
#                             print(row.name)
                        
                        df_html += '</table>'
                        
                        # custom pandas html
                        html += f"<td>{df_html}</td>"
                        
#                         html += f"<td>{df_by_param[key].to_html()}</td>"

                html += "</tr>"

            html += '</table>'

            fn = f'results_{f}_{username}.html'

            with open(fn, 'w') as f_out:
                f_out.write(html)

            out_files.append(fn)
        
    return out_files

In [117]:
write_result_to_html(results)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




['results_largely_recommended_sergei.html',
 'results_largely_recommended_le_science4all.html',
 'results_largely_recommended_aidjango.html',
 'results_largely_recommended___aggregate_expert__.html',
 'results_reliability_sergei.html',
 'results_reliability_le_science4all.html',
 'results_reliability_aidjango.html',
 'results_reliability___aggregate_expert__.html',
 'results_importance_sergei.html',
 'results_importance_le_science4all.html',
 'results_importance_aidjango.html',
 'results_importance___aggregate_expert__.html',
 'results_engaging_sergei.html',
 'results_engaging_le_science4all.html',
 'results_engaging_aidjango.html',
 'results_engaging___aggregate_expert__.html',
 'results_pedagogy_sergei.html',
 'results_pedagogy_le_science4all.html',
 'results_pedagogy_aidjango.html',
 'results_pedagogy___aggregate_expert__.html',
 'results_layman_friendly_sergei.html',
 'results_layman_friendly_le_science4all.html',
 'results_layman_friendly_aidjango.html',
 'results_layman_friendly_