# Model calculation

This notebook obtains an aggregate model and a best individual classification for each model in galaxy builder's original 296 subjects.

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
import json
import numpy as np
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
from tqdm import tqdm
import lib.galaxy_utilities as gu
import gzbuilder_analysis.parsing as parsing
import gzbuilder_analysis.rendering as rendering
import gzbuilder_analysis.aggregation as aggregation
import gzbuilder_analysis.fitting as fitting
from gzbuilder_analysis.fitting.jupyter import UpdatableDisplay
from IPython.display import HTML

Could not find some files, some functions may not work


Parameters of input (or output) file locations, and some opertational flags

In [4]:
AGGREGATE_LOCATION = 'lib/aggregation_results.pickle'
BEST_INDIVIDUAL_LOCATION = 'lib/best_individual.pickle'
FITTED_MODEL_LOCATION = 'lib/fitted_models.pickle'

Load in the list of subject ids to use

In [23]:
sid_list = np.loadtxt('lib/subject-id-list.csv', dtype='u8')
gal_angle_df = pd.read_csv('lib/gal-metadata.csv', index_col=0)
diff_data_df = pd.read_pickle('lib/diff-data.pkl')

## Preparation

Define functions to obtain the best individual classification for a galaxy, and its aggregate model

In [6]:
def get_agg_model(subject_id):
  gal = gal_angle_df.loc[subject_id]
  angle = gal['angle']
  diff_data = diff_data_df.loc[subject_id]
  size_diff = diff_data['width'] / diff_data['imageWidth']
  cls = gu.classifications.query(
    'subject_ids == {}'.format(subject_id)
  )
  model, error, masks, arms = aggregation.make_model(
    cls, gal, angle
  )
  scaled_model = parsing.scale_aggregate_model(model, size_diff=size_diff)
  scaled_errors = parsing.scale_model_errors(error, size_diff=size_diff)
  return scaled_model, scaled_errors, masks, arms
  
def get_best_cls(subject_id):
  gal = gal_angle_df.loc[subject_id]
  angle = gal['angle']
  diff_data = diff_data_df.loc[subject_id]

  ba = gal['PETRO_BA90']
  psf = diff_data['psf']
  pixel_mask = 1 - np.array(diff_data['mask'])[::-1]
  galaxy_data = np.array(diff_data['imageData'])[::-1]
  size_diff = diff_data['width'] / diff_data['imageWidth']

  cls = gu.classifications.query(
    'subject_ids == {}'.format(subject_id)
  )
  annotations = cls['annotations'].apply(json.loads)

  models = annotations.apply(parsing.parse_annotation, size_diff=size_diff)
  rendered = models.apply(
    rendering.calculate_model,
    image_size=diff_data['imageData'].shape[0],
    psf=psf
  )
  diffs = rendered.apply(
    rendering.compare_to_galaxy, args=(galaxy_data,),
    pixel_mask=pixel_mask, stretch=False
  )
  losses = rendered.apply(fitting.loss, args=(galaxy_data,), pixel_mask=pixel_mask)
  return losses.idxmin(), models.loc[losses.idxmin()]

Calculate the list of aggregate models

In [7]:
tqdm.pandas(desc='Calculating aggregate', leave=True)
aggregation_results = pd.Series(
  sid_list, index=sid_list
).progress_apply(get_agg_model)\
  .apply(pd.Series)
aggregation_results.columns = ('Model', 'Errors', 'Masks', 'Arms')
aggregation_results.to_pickle(AGGREGATE_LOCATION)

Calculate (or load) the list of best individual models

In [8]:
tqdm.pandas(desc='Finding best individual', leave=True)
best_indiv = pd.Series(sid_list, index=sid_list)\
  .progress_apply(get_best_cls)\
  .apply(pd.Series)
best_indiv.columns = ('Index', 'Model')
best_indiv.to_pickle(BEST_INDIVIDUAL_LOCATION)

Finding best individual:   5%|▌         | 15/296 [02:14<57:13, 12.22s/it]  Finding best individual:   7%|▋         | 22/296 [02:58<28:33,  6.25s/it]

KeyboardInterrupt: 