## SDSS Astroquery Batch

Batch runs through the images used in the GZ1 (SDSS DR7) dataset and returns half-light radii etc for data cuts

In [36]:
import numpy as np
import pandas as pd
import time
import math
import os
import random
import astropy.units as u
from astroquery.sdss import SDSS
from astropy.coordinates import SkyCoord

In [37]:
CATALOG_PATH = '../Data/gz1_desi_cross_cat.csv'
OUTPUT_PATH = '../Data/gz1_desi_cross_cat_queried.csv'
RADIUS = "1 arcsec"
catalog = pd.read_csv(CATALOG_PATH)

In [38]:
def split_dataframe(data, no_of_batches):
    batch_size = math.ceil(data.shape[0] / no_of_batches)
    batched_df = [data[i:i+batch_size] for i in range(0,data.shape[0], batch_size)]
    return batched_df

batched_df = split_dataframe(catalog,10)

if os.path.exists(OUTPUT_PATH):
    os.remove(OUTPUT_PATH)

for i, batch in enumerate(batched_df):
    print(f"Processing batch {i}")
    
    coords = SkyCoord(batch["RA"],batch["DEC"],unit=(u.hourangle, u.deg))

    results = SDSS.query_region(coords,data_release=7,radius=RADIUS,photoobj_fields=["objID","r","err_r","petroR50_r","petroR50Err_r"],spectro=True).to_pandas()

    merged_batch = pd.merge(batch,results, how='left', left_on='OBJID', right_on='objID')

    time.sleep(1)

    merged_batch.to_csv(OUTPUT_PATH, mode='a', header=not os.path.exists(OUTPUT_PATH),index=False)


Processing batch 0
