# Volunteer model loss calculation

This notebook takes a `subject_id` and calculates the MSE of each volunteer's model to the galaxy data. These losses are then glued to the notebook for use elsewhere.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from gzbuilder_analysis import parsing, rendering, fitting
import lib.galaxy_utilities as gu
import scrapbook as sb

  data = yaml.load(f.read()) or {}


In [3]:
subject_id = 20902040

In [4]:
# Parameters
subject_id = 21686544


In [5]:
diff_data_df = pd.read_pickle('lib/fitting_metadata.pkl')

In [6]:
def get_losses_by_user(subject_id):
  diff_data = diff_data_df.loc[subject_id]
  psf = diff_data['psf']
  pixel_mask = np.array(diff_data['pixel_mask'])[::-1]
  galaxy_data = np.array(diff_data['galaxy_data'])[::-1]
  image_size = galaxy_data.shape[0]
  size_diff = diff_data['size_diff']
  
  cls = gu.classifications.query('subject_ids == {}'.format(subject_id))
  names = cls.user_name 
  annotations = cls['annotations'].apply(json.loads)

  models = annotations.apply(
    parsing.parse_annotation,
    size_diff=size_diff
  )

  tqdm.pandas(
    desc='Rendering models'.format(subject_id),
    leave=False
  )
  rendered = models.progress_apply(
    rendering.calculate_model,
    image_size=image_size,
    psf=psf
  ).rename('rendered')

  tqdm.pandas(
    desc='Calculating differences'.format(subject_id),
    leave=False
  )
  losses = rendered.apply(
    fitting.loss,
    args=(galaxy_data, pixel_mask)
  ).rename('loss')
  all_losses = pd.Series(
    losses.values,
    index=names.values
  ).rename(subject_id)
  best_losses = pd.concat((
    all_losses.where(
      ~all_losses.duplicated(keep=False)
    ).dropna(), # all non-duplicates
    all_losses.where(
      all_losses.duplicated(keep=False)
    ).dropna().groupby(level=0).min() # best value of duplicates
  ), axis=0)
  return best_losses

In [7]:
losses = get_losses_by_user(subject_id)
losses

Rendering models:   0%|          | 0/30 [00:00<?, ?it/s]

Rendering models:  10%|█         | 3/30 [00:05<00:49,  1.84s/it]

Rendering models:  17%|█▋        | 5/30 [00:05<00:32,  1.31s/it]

Rendering models:  20%|██        | 6/30 [00:05<00:23,  1.04it/s]

Rendering models:  27%|██▋       | 8/30 [00:06<00:15,  1.43it/s]

Rendering models:  33%|███▎      | 10/30 [00:07<00:15,  1.33it/s]

Rendering models:  37%|███▋      | 11/30 [00:07<00:10,  1.77it/s]

Rendering models:  40%|████      | 12/30 [00:08<00:07,  2.27it/s]

Rendering models:  50%|█████     | 15/30 [00:08<00:05,  3.00it/s]

Rendering models:  57%|█████▋    | 17/30 [00:08<00:03,  3.43it/s]

Rendering models:  60%|██████    | 18/30 [00:08<00:03,  3.71it/s]

Rendering models:  63%|██████▎   | 19/30 [00:09<00:02,  3.67it/s]

Rendering models:  67%|██████▋   | 20/30 [00:09<00:02,  4.04it/s]

Rendering models:  77%|███████▋  | 23/30 [00:09<00:01,  5.35it/s]

Rendering models:  83%|████████▎ | 25/30 [00:09<00:00,  5.67it/s]

Rendering models:  87%|████████▋ | 26/30 [00:09<00:00,  6.13it/s]

Rendering models:  90%|█████████ | 27/30 [00:10<00:00,  4.84it/s]

Rendering models:  93%|█████████▎| 28/30 [00:10<00:00,  5.07it/s]

Rendering models:  97%|█████████▋| 29/30 [00:10<00:00,  5.15it/s]

Rendering models: 100%|██████████| 30/30 [00:10<00:00,  4.64it/s]

                                                                 



blazar.                               0.002416
cschwefl                              0.000713
Camelron                              0.000921
djswanso                              0.001996
rnevils                               0.000510
jmfranci                              0.002356
ElisabethB                            0.123786
tosnyder                              0.001027
xc                                    0.264628
fyzxfan                               0.000554
Saharisunshine                        0.001246
pangeli5                              0.000860
aidenr                                0.000500
aisha3540                             0.007546
Lavadude                              0.044463
Bambura                               0.006718
jzeiszler                             0.000873
barrowwright                          0.000854
ktayl47                               0.000387
not-logged-in-6cdc6c36435b7c7ca90e    0.153740
kgenere                               0.069516
not-logged-in

In [8]:
sb.glue('subject_id', subject_id)
sb.glue('losses', losses.to_dict())