# Run prediction pipeline for radio-detectable AGN

Jupyter Notebook to run the prediction pipeline presented in Carvajal et al., 2023.

Example code uses data from IR-detected sources in either HETDEX Spring field or Stripe 82.

In [None]:
%matplotlib inline
# Static plots
import numpy as np
from pycaret import classification as pyc
from pycaret import regression as pyr
from joblib import load
import pandas as pd
import global_variables as gv
import global_functions as gf
import os
import subprocess

### Reading data

Select the field to predict sources.

In [None]:
used_field = 'HETDEX'  # 'HETDEX' or 'S82'

In [None]:
file_name_dict      = {'S82': gv.file_S82, 'HETDEX': gv.file_HETDEX}
file_name           = gv.model_path + file_name_dict[used_field]

Check that data files have been downloaded.

In [None]:
if not os.path.exists(file_name):
    subprocess.run(['wget', '--no-check-certificate',
                    'https://zenodo.org/' + file_name_dict[used_field],
                    '-O', file_name], check=True, text=True)

For this notebook, not all columns will be needed. Select those that will be used.

In [None]:
used_cols = ['Z', 'band_num', 'class', 'W1mproPM', 'W2mproPM', 'gmag', 'rmag', 'imag', 'zmag', 'ymag', 'W3mag', 'W4mag', 'Jmag', 'Hmag', 'Kmag', 'LOFAR_detect', 'radio_AGN']

Load file

In [None]:
data_df = pd.read_parquet(file_name, 
                          engine='fastparquet', 
                          columns=used_cols)

Create new columns with colours. Only create colours used by models in pipeline. They are listed in the article.

In [None]:
colours_AGN   = ['g_r', 'r_i', 'r_J', 'i_z', 'i_y', 'z_y', 'z_W2', 'y_J', 'y_W1', 'y_W2', 'J_H', 'H_K', 'H_W3', 'W1_W2', 'W1_W3', 'W3_W4']
colours_radio = ['g_r', 'g_i', 'r_i', 'r_z', 'i_z', 'z_y', 'z_W1', 'y_J', 'y_W1', 'J_H', 'H_K', 'K_W3', 'K_W4', 'W1_W2', 'W2_W3']
colours_z     = ['g_r', 'g_W3', 'r_i', 'r_z', 'i_z', 'i_y', 'z_y', 'y_J', 'y_W1', 'J_H', 'H_K', 'K_W3', 'K_W4', 'W1_W2', 'W2_W3']
    
new_colours   = list(np.unique(colours_AGN + colours_radio + colours_z))

In [None]:
mag_names = {'g': 'gmag', 'r': 'rmag', 'i':'imag', 'z': 'zmag', 'y': 'ymag', 'J': 'Jmag', 'H': 'Hmag', 'K': 'Kmag', 'W1': 'W1mproPM', 'W2': 'W2mproPM', 'W3': 'W3mag', 'W4': 'W4mag'}

In [None]:
for colour in new_colours:
    mag_a_str, mag_b_str = colour.split('_')
    new_col = data_df.loc[:, mag_names[mag_a_str]] - data_df.loc[:, mag_names[mag_b_str]]
    data_df[colour] = new_col

Load models

Check that model files have been downloaded.

In [None]:
if not os.path.exists(gv.models_path + gv.AGN_gal_model):
    subprocess.run(['wget', '--no-check-certificate',
                    'https://zenodo.org/' + gv.AGN_gal_model,
                    '-O', file_name], check=True, text=True)
if not os.path.exists(gv.models_path + gv.cal_AGN_gal_model):
    subprocess.run(['wget', '--no-check-certificate',
                    'https://zenodo.org/' + gv.cal_AGN_gal_model,
                    '-O', file_name], check=True, text=True)
if not os.path.exists(gv.models_path + gv.radio_model):
    subprocess.run(['wget', '--no-check-certificate',
                    'https://zenodo.org/' + gv.radio_model,
                    '-O', file_name], check=True, text=True)
if not os.path.exists(gv.models_path + gv.cal_radio_model):
    subprocess.run(['wget', '--no-check-certificate',
                    'https://zenodo.org/' + gv.cal_radio_model,
                    '-O', file_name], check=True, text=True)
if not os.path.exists(gv.models_path + gv.full_z_model):
    subprocess.run(['wget', '--no-check-certificate',
                    'https://zenodo.org/' + gv.full_z_model,
                    '-O', file_name], check=True, text=True)

In [None]:
AGN_gal_clf           = pyc.load_model(gv.models_path + gv.AGN_gal_model)  #
cal_AGN_gal_clf       = load(gv.models_path + gv.cal_AGN_gal_model)  # calibrated model
radio_det_AGN_clf     = pyc.load_model(gv.models_path + gv.radio_model)
cal_radio_det_AGN_clf = load(gv.models_path + gv.cal_radio_model)  # calibrated model
redshift_reg_rAGN     = pyr.load_model(gv.models_path + gv.full_z_model)

#### Run prediction models

Run models over all sources in dataset. The user can select, afterwards, sources that would be predicted as radio-detectable AGN (or any other combination of predictions).

Classify between AGN and galaxies.

In [None]:
data_df = gf.predict_AGN_gal(data_df, 
                             AGN_gal_clf,
                             cal_AGN_gal_clf,
                             gv.AGN_thresh,
                             gv.cal_AGN_thresh)

Classify between radio-detectable and non radio-detectable sources.

In [None]:
data_df = gf.predict_radio_det(data_df,
                               radio_det_AGN_clf,
                               cal_radio_det_AGN_clf,
                               gv.radio_thresh,
                               gv.cal_radio_thresh)

Predict photometric redshifts.

In [None]:
data_df = gf.predict_z(data_df, 
                       redshift_reg_rAGN)

Display ten first predictions in data frame.

In [None]:
display(data_df.loc[:10])