# Model optimization

This notebook obtains an aggregate model and a best individual classification for each model in galaxy builder's original 296 subjects, and optimizes the model parameters (leaving position and roll fixed).

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import json
import numpy as np
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
from tqdm import tqdm
import lib.galaxy_utilities as gu
import gzbuilder_analysis.parsing as parsing
import gzbuilder_analysis.rendering as rendering
import gzbuilder_analysis.aggregation as aggregation
import gzbuilder_analysis.fitting as fitting
from gzbuilder_analysis.fitting.jupyter import UpdatableDisplay
from IPython.display import HTML

Parameters of input (or output) file locations, and some opertational flags

In [3]:
RECALCULATE_AGGREGATE = False
AGGREGATE_LOCATION = 'lib/aggregation_results.pickle'
RECALCULATE_BEST_INDIVIDUAL = False
BEST_INDIVIDUAL_LOCATION = 'lib/best_individual.pickle'
FITTED_MODEL_LOCATION = 'lib/fitted_models.pickle'

Load in the list of subject ids to use

In [4]:
sid_list = np.loadtxt('lib/subject-id-list.csv', dtype='u8')

## Preparation

Define functions to obtain the best individual classification for a galaxy, and its aggregate model

In [5]:
def get_agg_model(subject_id):
    gal, angle = gu.get_galaxy_and_angle(subject_id)
    diff_data = gu.get_diff_data(subject_id)
    size_diff = diff_data['width'] / diff_data['imageWidth']

    cls = gu.classifications.query(
        'subject_ids == {}'.format(subject_id)
    )

    model, error, masks, arms = aggregation.make_model(
        cls, gal, angle
    )
    scaled_model = parsing.scale_aggregate_model(model, size_diff=size_diff)
    scaled_errors = parsing.scale_model_errors(error, size_diff=size_diff)
    return scaled_model, scaled_errors, masks, arms
  
def get_best_cls(subject_id):
    gal, angle = gu.get_galaxy_and_angle(subject_id)
    ba = gal['PETRO_BA90']
    im = gu.get_image(subject_id)
    psf = gu.get_psf(subject_id)
    diff_data = gu.get_diff_data(subject_id)
    pixel_mask = 1 - np.array(diff_data['mask'])[::-1]
    galaxy_data = np.array(diff_data['imageData'])[::-1]
    size_diff = diff_data['width'] / diff_data['imageWidth']

    query_string = 'subject_ids == {}'.format(subject_id)
    cls = gu.classifications.query(query_string)
    annotations = cls['annotations'].apply(json.loads)

    models = annotations.apply(parsing.parse_annotation, size_diff=size_diff)
    rendered = models.apply(
        rendering.calculate_model,
        image_size=diff_data['imageData'].shape[0],
        psf=psf
    )
    diffs = rendered.apply(
        rendering.compare_to_galaxy, args=(galaxy_data,),
        pixel_mask=pixel_mask, stretch=False
    )
    losses = rendered.apply(fitting.loss, args=(galaxy_data,), pixel_mask=pixel_mask)
    return losses.idxmin(), models.loc[losses.idxmin()]

Calculate (or load) the list of aggregate models

In [6]:
if RECALCULATE_AGGREGATE:
    tqdm.pandas(desc='Running on all', leave=True)
    aggregation_results = pd.Series(
        sid_list, index=sid_list
    ).progress_apply(get_agg_model)\
        .apply(pd.Series)
    aggregation_results.columns = ('Model', 'Errors', 'Masks', 'Arms')
    aggregation_results.to_pickle(AGGREGATE_LOCATION)
else:
    aggregation_results = pd.read_pickle(AGGREGATE_LOCATION)

Calculate (or load) the list of best individual models

In [7]:
if RECALCULATE_BEST_INDIVIDUAL:
    tqdm.pandas(desc='Running on all', leave=True)
    best_indiv = pd.Series(sid_list, index=sid_list)\
        .progress_apply(get_best_cls)\
        .apply(pd.Series)
    best_indiv.columns = ('Index', 'Model')
    best_indiv.to_pickle(BEST_INDIVIDUAL_LOCATION)
else:
    best_indiv = pd.read_pickle(
        BEST_INDIVIDUAL_LOCATION
    )

We make use of `gzbuilder_aggregation.fitting.Model` to perform fitting. Convert all our models from above into instances of this Object:

In [8]:
def make_model(subject_id, m):
    psf = gu.get_psf(subject_id)
    diff_data = gu.get_diff_data(subject_id)
    pixel_mask = 1 - np.array(diff_data['mask'])[::-1]
    galaxy_data = np.array(diff_data['imageData'])[::-1]
    return fitting.Model(m, galaxy_data, psf=psf, pixel_mask=pixel_mask)

In [9]:
agg_models = pd.Series([]).rename('agg_models')
with tqdm(
  aggregation_results['Model'].items(),
  total=len(aggregation_results)
) as bar:
  for i, model in bar:
      agg_models[i] = make_model(i, model)

100%|██████████| 296/296 [01:35<00:00,  4.89it/s]


In [10]:
bi_models = pd.Series([]).rename('models')
with tqdm(best_indiv['Model'].items(), total=len(best_indiv)) as bar:
  for i, model in bar:
      bi_models[i] = make_model(i, model)

100%|██████████| 296/296 [01:34<00:00,  4.87it/s]


## Optimization

We now perform fitting on the models created earlier. Parameters fit are

|         | $i_0$ | $R_e$ | $e$ | $n$ | $c$ | $W$ | $F$ |
|:--------|:------|:------|:----|:----|:----|:----|:----|
| Disk    | x     | x     |     |     |     |     |     |
| Bulge   | x     | x     |     | x   |     |     |     |
| Bar     | x     | x     |     | x   | x   |     |     |
| Spirals | x     |       |     |     |     | x   | x   |

Where $W$ is the spiral spread and $F$ is spiral falloff (others are parameters of a boxy Sérsic profile).

For the aggregate model we first fit without spiral arms, then introduce low-brightness arms and fit fully.

In [23]:
from gzbuilder_analysis.config import FIT_PARAMS
FIT_PARAMS

{'disk': ('i0', 'rEff'),
 'bulge': ('i0', 'rEff', 'n'),
 'bar': ('i0', 'rEff', 'n', 'c'),
 'spiral': ('i0', 'spread', 'falloff')}

In [11]:
def reset_spiral_intensity(s):
    points, params = s
    new_params = deepcopy(params)
    new_params['i0'] = 0.01
    return [points, new_params]

In [12]:
fitted_agg_models = pd.Series([]).rename('fitted_agg_models')

In [22]:
display(HTML('<br>Fitting models</br>'))
v = lambda *a: HTML('{} / {}: {} - {}'.format(*a))
d = UpdatableDisplay(v(0, len(sid_list), '', ''))
for i, (s, model) in enumerate(agg_models.items()):
    if s in fitted_agg_models.index:
        continue
    d(v(i, len(sid_list), s, '(1/2) Without spirals'))
    try:
        spirals_removed = deepcopy(model._model['spiral'])
        agg_model_nosp = model.copy_with_new_model({
          **deepcopy(model._model),
          'spiral': np.array([])
        })
        new_model, res = fitting.fit(agg_model_nosp, progress=True)
        if res['success']:
            if len(spirals_removed) == 0:
                fitted_agg_models[s] = new_model
                continue
            d(v(i, len(sid_list), s, '(2/2) With spirals'))
            agg_model_sp = model.copy_with_new_model({
              **deepcopy(new_model),
              'spiral': [reset_spiral_intensity(s) for s in spirals_removed],
            })
            new_model, res = fitting.fit(agg_model_sp, progress=True)
            if res['success']:
                fitted_agg_models[s] = new_model
    except ZeroDivisionError:
        pass
    except KeyboardInterrupt:
        break
d(v(i+1, len(sid_list), 'All done', ''))

                                      

And the same for the best individual classifications

In [14]:
fitted_bi_models = pd.Series([]).rename('fitted_bi_models')

In [17]:
display(HTML('<br>Fitting models</br>'))
v = lambda *a: HTML('{} / {}: {}'.format(*a))
d = UpdatableDisplay(v(0, len(sid_list), ''))
for i, (s, model) in enumerate(bi_models.items()):
    if s in fitted_bi_models.index:
        continue
    d(v(i, len(sid_list), s))
    try:
        new_model, res = fitting.fit(model, progress=True)
        if res['success']:
            fitted_bi_models[s] = new_model
    except ZeroDivisionError:
        pass
    except KeyboardInterrupt:
        break
d(v(i+1, len(sid_list), 'All done'))

                                                                           

In [19]:
fitted_models = pd.concat((
    fitted_agg_models,
    fitted_bi_models,
), axis=1)
fitted_models.to_pickle('lib/fitted_models.pickle')