## SDSS Astroquery Batch

Batch runs through the images used in the GZ1 (SDSS DR7) dataset and returns half-light radii etc for data cuts

In [1]:
import numpy as np
import pandas as pd
import time
import math
import os
import astropy.units as u
from astroquery.sdss import SDSS
from astropy.coordinates import SkyCoord

In [2]:
CATALOG_PATH = '../Data/gz1_desi_cross_cat.csv'
OUTPUT_PATH = '../Data/gz1_desi_cross_cat_queried.csv'
RADIUS = "1 arcsec"
catalog = pd.read_csv(CATALOG_PATH)

In [3]:
def split_dataframe(data, no_of_batches):
    batch_size = math.ceil(data.shape[0] / no_of_batches)
    batched_df = [data[i:i+batch_size] for i in range(0,data.shape[0], batch_size)]
    return batched_df

def get_SDSS_info_batch():
    batched_df = split_dataframe(catalog,200) #30s per batch, more than this seems to fail

    if os.path.exists(OUTPUT_PATH):
        os.remove(OUTPUT_PATH)

    for i, batch in enumerate(batched_df):
        #print(f"Processing batch {i}")
        
        coords = SkyCoord(batch["RA"],batch["DEC"],unit=(u.hourangle, u.deg))
        results = pd.DataFrame(SDSS.query_region(coords,data_release=7,radius=RADIUS,photoobj_fields=["objID","ra","dec","err_r","petroR50_r","petroR50Err_r"]).to_pandas())
    
        #Clean up OBJID fields
        batch.loc[:,'OBJID'] = batch['OBJID'].astype(str).str.strip()
        results.loc[:,'objID'] = results['objID'].astype(str).str.strip()
        
        k=0
        j=0
        rows_list = []
        while k < len(batch)-1: #Run through each item in batch
            batch_row = batch.iloc[k]
            results_row = results.iloc[j]
            
            if batch_row['OBJID'] == results_row['objID']: #If OBJIDs match
                #print(f"Match at row {k}")
                if batch.iloc[k+1]['OBJID'] == results.iloc[j+1]['objID']: #If next object OBJIDs match
                    #print(f"Adding row {k} as next row matches")
                    batch_dict = batch_row.to_dict()
                    results_dict = results_row.to_dict()
                    batch_dict.update(results_dict)# Add matching rows from batch and results
                    rows_list.append(batch_dict)
                else:
                    #print(f"Skipping row {k} as next row does not match")
                    while batch.iloc[k+1]['OBJID'] != results.iloc[j+1]['objID']:
                        j += 1 # Move through results until match found
            k += 1 #Move on to next i
            j += 1 # Move on to next j

        final_columns = batch.columns.to_list()+results.columns.to_list()
        final = pd.DataFrame(rows_list,columns= final_columns)
        reduced = final.drop(["Unnamed: 0","objID","ra","dec"],axis=1)
        #print(f"Length of final: {len(final)}, with {len(pd.unique(final['OBJID']))} unique")
        
        time.sleep(1)
        reduced.to_csv(OUTPUT_PATH, mode='a', header=not os.path.exists(OUTPUT_PATH),index=False)
        print(f"Processing batch {i} ({len(batch)} items, {len(results)} results found, cut to {len(reduced)}))")

get_SDSS_info_batch()



Processing batch 0 (3240 items, 6071 results found, cut to 1342))




Processing batch 1 (3240 items, 6130 results found, cut to 1351))




Processing batch 2 (3240 items, 6132 results found, cut to 1346))




Processing batch 3 (3240 items, 6246 results found, cut to 1339))




Processing batch 4 (3240 items, 6594 results found, cut to 1276))




Processing batch 5 (3240 items, 6696 results found, cut to 1222))




Processing batch 6 (3240 items, 5823 results found, cut to 1493))




Processing batch 7 (3240 items, 6803 results found, cut to 1153))




Processing batch 8 (3240 items, 6676 results found, cut to 1209))




Processing batch 9 (3240 items, 6494 results found, cut to 1269))




Processing batch 10 (3240 items, 7262 results found, cut to 1082))




Processing batch 11 (3240 items, 7844 results found, cut to 0))




Processing batch 12 (3240 items, 8048 results found, cut to 906))




Processing batch 13 (3240 items, 7025 results found, cut to 1104))




Processing batch 14 (3240 items, 6681 results found, cut to 1134))




Processing batch 15 (3240 items, 6951 results found, cut to 1114))




Processing batch 16 (3240 items, 6981 results found, cut to 1114))




Processing batch 17 (3240 items, 6959 results found, cut to 1103))




Processing batch 18 (3240 items, 6803 results found, cut to 1174))




Processing batch 19 (3240 items, 7290 results found, cut to 0))




Processing batch 20 (3240 items, 7880 results found, cut to 964))




Processing batch 21 (3240 items, 8589 results found, cut to 847))




Processing batch 22 (3240 items, 7460 results found, cut to 1054))




Processing batch 23 (3240 items, 6647 results found, cut to 0))




Processing batch 24 (3240 items, 6427 results found, cut to 1281))




Processing batch 25 (3240 items, 7048 results found, cut to 1104))




Processing batch 26 (3240 items, 7933 results found, cut to 0))




Processing batch 27 (3240 items, 7750 results found, cut to 992))




Processing batch 28 (3240 items, 8013 results found, cut to 901))




Processing batch 29 (3240 items, 7879 results found, cut to 0))




Processing batch 30 (3240 items, 7089 results found, cut to 1083))




Processing batch 31 (3240 items, 7317 results found, cut to 1060))




Processing batch 32 (3240 items, 7564 results found, cut to 1053))




Processing batch 33 (3240 items, 8359 results found, cut to 882))




Processing batch 34 (3240 items, 7577 results found, cut to 1015))




Processing batch 35 (3240 items, 7506 results found, cut to 1018))




Processing batch 36 (3240 items, 7631 results found, cut to 969))




Processing batch 37 (3240 items, 7856 results found, cut to 939))




Processing batch 38 (3240 items, 8176 results found, cut to 911))




Processing batch 39 (3240 items, 7428 results found, cut to 1068))




Processing batch 40 (3240 items, 7673 results found, cut to 949))




Processing batch 41 (3240 items, 8133 results found, cut to 0))




Processing batch 42 (3240 items, 8482 results found, cut to 838))




Processing batch 43 (3240 items, 7869 results found, cut to 889))




Processing batch 44 (3240 items, 7894 results found, cut to 0))




Processing batch 45 (3240 items, 7905 results found, cut to 949))




Processing batch 46 (3240 items, 7762 results found, cut to 0))




Processing batch 47 (3240 items, 7744 results found, cut to 0))




Processing batch 48 (3240 items, 8059 results found, cut to 913))




Processing batch 49 (3240 items, 7616 results found, cut to 1005))




Processing batch 50 (3240 items, 7265 results found, cut to 0))




Processing batch 51 (3240 items, 7370 results found, cut to 0))




Processing batch 52 (3240 items, 7521 results found, cut to 1014))




Processing batch 53 (3240 items, 7453 results found, cut to 1075))




Processing batch 54 (3240 items, 7705 results found, cut to 1016))




Processing batch 55 (3240 items, 7610 results found, cut to 1027))




Processing batch 56 (3240 items, 7543 results found, cut to 1054))




Processing batch 57 (3240 items, 7276 results found, cut to 1115))




Processing batch 58 (3240 items, 7090 results found, cut to 1099))




Processing batch 59 (3240 items, 7172 results found, cut to 1109))




Processing batch 60 (3240 items, 7326 results found, cut to 1130))




Processing batch 61 (3240 items, 7358 results found, cut to 1049))




Processing batch 62 (3240 items, 7327 results found, cut to 1049))




Processing batch 63 (3240 items, 7513 results found, cut to 0))




Processing batch 64 (3240 items, 7661 results found, cut to 1016))




Processing batch 65 (3240 items, 7568 results found, cut to 1046))




Processing batch 66 (3240 items, 7188 results found, cut to 0))




Processing batch 67 (3240 items, 7491 results found, cut to 994))




Processing batch 68 (3240 items, 7390 results found, cut to 1002))




Processing batch 69 (3240 items, 7309 results found, cut to 0))




Processing batch 70 (3240 items, 7730 results found, cut to 943))




Processing batch 71 (3240 items, 7651 results found, cut to 1002))




Processing batch 72 (3240 items, 7883 results found, cut to 926))




Processing batch 73 (3240 items, 7692 results found, cut to 992))




Processing batch 74 (3240 items, 7818 results found, cut to 920))




Processing batch 75 (3240 items, 7655 results found, cut to 970))




Processing batch 76 (3240 items, 7528 results found, cut to 0))




Processing batch 77 (3240 items, 7583 results found, cut to 994))




Processing batch 78 (3240 items, 8254 results found, cut to 0))




Processing batch 79 (3240 items, 8181 results found, cut to 0))




Processing batch 80 (3240 items, 7856 results found, cut to 957))




Processing batch 81 (3240 items, 7923 results found, cut to 931))




Processing batch 82 (3240 items, 7780 results found, cut to 941))




Processing batch 83 (3240 items, 8093 results found, cut to 892))




Processing batch 84 (3240 items, 8614 results found, cut to 822))




Processing batch 85 (3240 items, 7669 results found, cut to 930))




Processing batch 86 (3240 items, 7835 results found, cut to 892))




Processing batch 87 (3240 items, 7647 results found, cut to 909))




Processing batch 88 (3240 items, 7678 results found, cut to 913))




Processing batch 89 (3240 items, 8233 results found, cut to 0))




Processing batch 90 (3240 items, 7761 results found, cut to 0))




Processing batch 91 (3240 items, 7737 results found, cut to 963))




Processing batch 92 (3240 items, 7683 results found, cut to 978))




Processing batch 93 (3240 items, 8133 results found, cut to 0))




Processing batch 94 (3240 items, 8141 results found, cut to 878))




Processing batch 95 (3240 items, 7410 results found, cut to 982))




Processing batch 96 (3240 items, 7452 results found, cut to 972))




Processing batch 97 (3240 items, 8198 results found, cut to 0))




Processing batch 98 (3240 items, 7739 results found, cut to 960))




Processing batch 99 (3240 items, 7369 results found, cut to 1039))




Processing batch 100 (3240 items, 7767 results found, cut to 976))




Processing batch 101 (3240 items, 8159 results found, cut to 879))




Processing batch 102 (3240 items, 7583 results found, cut to 964))




Processing batch 103 (3240 items, 7416 results found, cut to 0))




Processing batch 104 (3240 items, 7454 results found, cut to 1008))




Processing batch 105 (3240 items, 7761 results found, cut to 0))




Processing batch 106 (3240 items, 7709 results found, cut to 962))




Processing batch 107 (3240 items, 7442 results found, cut to 0))




Processing batch 108 (3240 items, 7417 results found, cut to 1005))




ReadTimeout: HTTPSConnectionPool(host='skyserver.sdss.org', port=443): Read timed out. (read timeout=60)

In [None]:
# #reduced =  catalog.drop_duplicates(subset=['OBJID'])
# reduced = catalog.drop(["Unnamed: 0","Unnamed: 0.1"],axis=1)
# print(reduced.shape[0])
# print(f"Number of galaxies in GZ1 catalogue: {catalog.shape[0]}")
# print(f"Columns: {catalog.columns.values}")
# reduced.head(10)
# #reduced.to_csv(OUTPUT_PATH,index=False)