## Load the CSV data

In [1]:
import pandas as pd

In [2]:
ZOO_DATA = '/Users/julia/Desktop/Master Astro/Dades/Python Bootcamp/ZooSpecPhotoDR19_torradeflot.csv'

In [3]:
df = pd.read_csv(ZOO_DATA)

# Filter out incorrect data

According to the documentation, we will be using the Model magnitude. We filter out the objects without a proper magnitude: https://www.sdss4.org/dr12/algorithms/magnitudes/

We will apply a restrictive threshold in the zooSpec table to decide if an object was successfully classified

We will only keep the objects that are reasonable big, but still fit comfortably in the cutouts.

In [4]:
m = (
    (df.modelMag_u >-30) & (df.modelMag_g > -30) & (df.modelMag_r > -30) & (df.modelMag_i > -30) & (df.modelMag_z > -30) # correct magnitudes
    & (df.modelMagErr_u < 0.5) & (df.modelMagErr_g < 0.05) & (df.modelMagErr_r < 0.05) & (df.modelMagErr_i < 0.05) & (df.modelMagErr_z < 0.1) # reasonable errors
    & ((df.p_cs_debiased >= 0.9) | (df.p_el_debiased >= 0.9)) # very certain about the classification
    & (df.petroR90_r*2*1.5/0.4 < 64) & (df.petroR90_r*2/0.4 > 20) # Medium sized
)

In [5]:
len(df), len(df[m])

(659272, 69352)

In [6]:
cols_to_keep = ['specobjid', 'objid', 'dr7objid', 'ra', 'dec', 'p_el_debiased', 'p_cs_debiased', 'spiral', 'elliptical'] + \
    ['petroR50_r', 'petroR90_r'] + [f'modelMag_{f}' for f in "ugriz"] + [f'extinction_{f}' for f in "ugriz"]
df_filtered = df[m][cols_to_keep]

In [7]:
df_filtered.to_csv('/Users/julia/Desktop/Master Astro/Dades/Python Bootcamp/ZooSpecPhotoDR19_filtered.csv')