# Model calculation

This notebook obtains an aggregate model and a best individual classification for each model in galaxy builder's original 296 subjects.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
if os.getcwd().split('/')[-1] != 'gzbuilder_results':
    os.chdir('..')
print(os.getcwd())

/Users/tlingard/PhD/gzbuilder_collection/gzbuilder_results


In [3]:
import os
import json
import numpy as np
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
from tqdm import tqdm
import lib.galaxy_utilities as gu
import gzbuilder_analysis.parsing as parsing
import gzbuilder_analysis.rendering as rendering
import gzbuilder_analysis.aggregation as aggregation
import gzbuilder_analysis.fitting as fitting
from gzbuilder_analysis.fitting.jupyter import UpdatableDisplay
from IPython.display import HTML

Parameters of input (or output) file locations, and some opertational flags

In [4]:
AGGREGATE_LOCATION = 'lib/aggregation_results.pickle'
BEST_INDIVIDUAL_LOCATION = 'lib/best_individual.pickle'

Load in the list of subject ids to use

In [5]:
sid_list = np.loadtxt('lib/subject-id-list.csv', dtype='u8')
gal_angle_df = pd.read_csv('lib/gal-metadata.csv', index_col=0)
fitting_metadata = pd.read_pickle('lib/fitting_metadata.pkl')

## Preparation

Define functions to obtain the best individual classification for a galaxy, and its aggregate model

In [6]:
def get_agg_model(subject_id):
    gal = gal_angle_df.loc[subject_id]
    angle = gal['angle']
    diff_data = fitting_metadata.loc[subject_id]
    size_diff = diff_data['size_diff']
    cls = gu.classifications.query(
        'subject_ids == {}'.format(subject_id)
    )
    model, error, masks, arms = aggregation.make_model(
        cls, gal, angle
    )
    scaled_model = parsing.scale_aggregate_model(model, size_diff=size_diff)
    scaled_errors = parsing.scale_model_errors(error, size_diff=size_diff)
    return scaled_model, scaled_errors, masks, arms
  
def get_best_cls(subject_id):
    gal = gal_angle_df.loc[subject_id]
    angle = gal['angle']
    diff_data = fitting_metadata.loc[subject_id]

    ba = gal['PETRO_BA90']
    psf = diff_data['psf']
    pixel_mask = np.array(diff_data['pixel_mask'])[::-1]
    galaxy_data = np.array(diff_data['galaxy_data'])[::-1]
    sigma_image = np.array(diff_data['sigma_image'])[::-1]
    size_diff = diff_data['size_diff']


    cls = gu.classifications.query(
        'subject_ids == {}'.format(subject_id)
    )
    annotations = cls['annotations'].apply(json.loads)

    models = annotations.apply(parsing.parse_annotation, size_diff=size_diff)
    rendered = models.apply(
        rendering.calculate_model,
        image_size=galaxy_data.shape[0],
        psf=psf
    )
    losses = rendered.apply(
        fitting.loss,
        args=(galaxy_data,),
        pixel_mask=pixel_mask,
        sigma_image=sigma_image
    )
    return losses.idxmin(), models.loc[losses.idxmin()]

Calculate the list of best individual models

In [7]:
best_indiv = pd.Series([])
with tqdm(sid_list, desc='Finding best individual', leave=True) as bar:
    for subject_id in bar:
        best_indiv.loc[subject_id] = get_best_cls(subject_id)
best_indiv = best_indiv.apply(pd.Series)
best_indiv.columns = ('Index', 'Model')
best_indiv.to_pickle(BEST_INDIVIDUAL_LOCATION)

Finding best individual: 100%|██████████| 296/296 [38:26<00:00,  3.64s/it] 


In [10]:
best_indiv.sample(5)

Unnamed: 0,Index,Model
21096847,8566,"{'disk': {'mu': [117.01575428247452, 118.30514..."
21096891,4904,"{'disk': {'mu': [104.9871826171875, 103.609313..."
20902061,2337,"{'disk': {'mu': [43.23891520500183, 42.9580732..."
21096996,7546,"{'disk': {'mu': [45.5218505859375, 45.45867919..."
20902055,7094,"{'disk': {'mu': [116.64576721191406, 115.97914..."


Calculate the list of aggregate models

In [9]:
tqdm.pandas(desc='Calculating aggregate', leave=True)
aggregation_results = pd.Series(
    sid_list, index=sid_list
).progress_apply(get_agg_model)\
    .apply(pd.Series)
aggregation_results.columns = ('Model', 'Errors', 'Masks', 'Arms')
aggregation_results.to_pickle(AGGREGATE_LOCATION)

Calculating aggregate: 100%|██████████| 296/296 [1:03:45<00:00, 11.65s/it]
