In [1]:
# import libraries

import pandas as pd
import numpy as np

In [2]:
# set display settings

pd.set_option("display.max_rows", None)
pd.set_option('display.max_columns', None)

In [5]:
# specify columns, datatypes, and load full consolidated sdss dataset

# load column names from file into a list
sdss_headers = pd.read_csv('../data/processed/sdss_full.csv.gz', compression='gzip', header=0, index_col=0, nrows=0).columns.tolist()

# specify groups of columns
object_identifiers = ['dr7objid', 'dr8objid', 'specobjid', 'gz2_filenumber']
object_locations = ['ra', 'dec']
object_class_labels = ['sdss_clean_class_name', 'sdss_clean_subclass_name', 'gz2_subclass_name', 'elodie_spectral_type']
object_binary_labels = [label for label in sdss_headers if 'flag' in label]
object_filters = ['score', 'petroR90_r']
object_features = ['redshift_final', 'redshift_noqso', 'redshift_elodie',
                   'u_s', 'g_s', 'r_s', 'i_s', 'z_s',
                   'u_p', 'g_p', 'r_p', 'i_p', 'z_p',
                   'elodie_color_index', 'elodie_temperature', 'elodie_metallicity']

# create a datatype dictionary for object_identifiers
object_dtypes = {col: 'string' for col in object_identifiers}

# load full consolidated sdss dataset
sdss_full = pd.read_csv('../data/processed/sdss_full.csv.gz', compression='gzip', header=0, dtype=object_dtypes)

"object_dtypes.update({col: 'float64' for col in object_locations})\nobject_dtypes.update({col: 'string' for col in object_class_labels})\nobject_dtypes.update({col: 'string' for col in object_binary_labels}) # <------------------------------------ need to figure out how to load the bools (investigate display below)\nobject_dtypes.update({col: 'float64' for col in object_filters})\nobject_dtypes.update({col: 'float64' for col in object_features})"

In [7]:
# inspect the imported data
 
 # display some data
display(sdss_full.head(3))
display(sdss_full.tail(3))
display(sdss_full.sample(10))
 
# inspect dataframe
print(sdss_full.shape)
print(sdss_full.info())
for identifier in object_identifiers:
    print(f"\nTop Value Counts:  {sdss_full[identifier].value_counts().nlargest(5)}")

Unnamed: 0,dr7objid,dr8objid,specobjid,gz2_filenumber,ra,dec,sdss_clean_class_name,sdss_clean_subclass_name,gz2_subclass_name,elodie_spectral_type,gz1_flag_spiral,gz1_flag_elliptical,gz1_flag_uncertain,gz2_flag_smooth,gz2_flag_features_or_disk,gz2_flag_star_or_artifact,gz2_flag_edgeon_yes,gz2_flag_edgeon_no,gz2_flag_bar_yes,gz2_flag_bar_no,gz2_flag_spiral_yes,gz2_flag_spiral_no,gz2_flag_bulge_none1,gz2_flag_bulge_small,gz2_flag_bulge_medium,gz2_flag_bulge_large,gz2_flag_odd_yes,gz2_flag_odd_no,gz2_flag_round_circular,gz2_flag_round_medium,gz2_flag_round_cigar,gz2_flag_feature_ring,gz2_flag_feature_lens_or_arc,gz2_flag_feature_disturbed,gz2_flag_feature_irregular,gz2_flag_feature_other,gz2_flag_feature_merger,gz2_flag_feature_dust_lane,gz2_flag_bulge_round,gz2_flag_bulge_boxy,gz2_flag_bulge_none2,gz2_flag_arms_tight,gz2_flag_arms_medium,gz2_flag_arms_loose,gz2_flag_arms_1,gz2_flag_arms_2,gz2_flag_arms_3,gz2_flag_arms_4,gz2_flag_arms_many,gz2_flag_arms_unsure,score,petroR90_r,redshift_final,redshift_noqso,redshift_elodie,u_s,g_s,r_s,i_s,z_s,u_p,g_p,r_p,i_p,z_p,elodie_color_index,elodie_temperature,elodie_metallicity
0,588007006336254064,1237651252584448100,693716010653476864,192933.0,236.35553,54.820911,galaxy,STARFORMING,SBc2l,,True,False,False,False,True,False,False,True,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,0.850861,10.06485,0.080127,0.0,0.0,6.013997,19.66714,43.23034,61.21994,73.57887,18.91253,17.53665,16.81467,16.43992,16.18453,,,
1,588007006336254083,1237651252584448112,693718759432546304,,236.342,54.80283,galaxy,,,,False,False,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.850861,6.995347,0.193371,0.0,0.0,4.234163,11.95987,31.28475,46.43085,56.40972,19.66911,18.16584,17.27259,16.84296,16.5585,,,
2,587729226614112406,1237651252584251555,693718209676732416,32264.0,235.891464,55.133472,galaxy,AGN,SBc?t,,True,False,False,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.86336,10.95925,0.040399,0.0,0.0,18.20395,60.73151,129.9764,179.2148,215.1861,17.46699,15.83618,15.09113,14.70903,14.41906,,,


Unnamed: 0,dr7objid,dr8objid,specobjid,gz2_filenumber,ra,dec,sdss_clean_class_name,sdss_clean_subclass_name,gz2_subclass_name,elodie_spectral_type,gz1_flag_spiral,gz1_flag_elliptical,gz1_flag_uncertain,gz2_flag_smooth,gz2_flag_features_or_disk,gz2_flag_star_or_artifact,gz2_flag_edgeon_yes,gz2_flag_edgeon_no,gz2_flag_bar_yes,gz2_flag_bar_no,gz2_flag_spiral_yes,gz2_flag_spiral_no,gz2_flag_bulge_none1,gz2_flag_bulge_small,gz2_flag_bulge_medium,gz2_flag_bulge_large,gz2_flag_odd_yes,gz2_flag_odd_no,gz2_flag_round_circular,gz2_flag_round_medium,gz2_flag_round_cigar,gz2_flag_feature_ring,gz2_flag_feature_lens_or_arc,gz2_flag_feature_disturbed,gz2_flag_feature_irregular,gz2_flag_feature_other,gz2_flag_feature_merger,gz2_flag_feature_dust_lane,gz2_flag_bulge_round,gz2_flag_bulge_boxy,gz2_flag_bulge_none2,gz2_flag_arms_tight,gz2_flag_arms_medium,gz2_flag_arms_loose,gz2_flag_arms_1,gz2_flag_arms_2,gz2_flag_arms_3,gz2_flag_arms_4,gz2_flag_arms_many,gz2_flag_arms_unsure,score,petroR90_r,redshift_final,redshift_noqso,redshift_elodie,u_s,g_s,r_s,i_s,z_s,u_p,g_p,r_p,i_p,z_p,elodie_color_index,elodie_temperature,elodie_metallicity
1654165,,1237658613593997533,9259471051523641344,,177.50007,50.211641,quasar,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.807279,,1.076416,0.231599,0.0,0.765539,1.052253,1.874948,1.917074,2.357079,21.84895,21.91097,21.59686,21.60826,21.09701,0.0,0.0,0.0
1654166,,1237658613594063054,9259459231773642752,,177.79391,50.208232,unclassified,unclassified,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.819832,,2.060357,-0.004214,0.0,7.819283,5.829438,6.240019,6.629135,8.703038,20.83662,20.683,20.50494,20.39749,20.39611,0.0,0.0,0.0
1654167,,1237658613594063058,9259460331285270528,,177.79678,50.375821,quasar,BROADLINE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.819832,,1.856644,0.123349,0.0,0.191132,1.780057,1.906872,2.832188,3.034446,22.0247,21.56175,21.59132,21.17899,21.09313,0.0,0.0,0.0


Unnamed: 0,dr7objid,dr8objid,specobjid,gz2_filenumber,ra,dec,sdss_clean_class_name,sdss_clean_subclass_name,gz2_subclass_name,elodie_spectral_type,gz1_flag_spiral,gz1_flag_elliptical,gz1_flag_uncertain,gz2_flag_smooth,gz2_flag_features_or_disk,gz2_flag_star_or_artifact,gz2_flag_edgeon_yes,gz2_flag_edgeon_no,gz2_flag_bar_yes,gz2_flag_bar_no,gz2_flag_spiral_yes,gz2_flag_spiral_no,gz2_flag_bulge_none1,gz2_flag_bulge_small,gz2_flag_bulge_medium,gz2_flag_bulge_large,gz2_flag_odd_yes,gz2_flag_odd_no,gz2_flag_round_circular,gz2_flag_round_medium,gz2_flag_round_cigar,gz2_flag_feature_ring,gz2_flag_feature_lens_or_arc,gz2_flag_feature_disturbed,gz2_flag_feature_irregular,gz2_flag_feature_other,gz2_flag_feature_merger,gz2_flag_feature_dust_lane,gz2_flag_bulge_round,gz2_flag_bulge_boxy,gz2_flag_bulge_none2,gz2_flag_arms_tight,gz2_flag_arms_medium,gz2_flag_arms_loose,gz2_flag_arms_1,gz2_flag_arms_2,gz2_flag_arms_3,gz2_flag_arms_4,gz2_flag_arms_many,gz2_flag_arms_unsure,score,petroR90_r,redshift_final,redshift_noqso,redshift_elodie,u_s,g_s,r_s,i_s,z_s,u_p,g_p,r_p,i_p,z_p,elodie_color_index,elodie_temperature,elodie_metallicity
1214764,,1237667912198718139,2815982388159473664,,172.09666,24.929282,quasar,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.876677,,3.195375,0.0,0.0,0.131682,1.561267,6.967295,11.86696,18.09866,23.45998,21.41952,20.11442,19.95722,19.62216,0.0,0.0,0.0
153783,5.880093680098345e+17,1237653614258028686,622731533672802304,,137.3269,53.28003,galaxy,STARFORMING,,,False,False,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.832386,3.529115,0.100046,0.0,0.0,29.65409,58.27394,101.6454,151.0756,177.5662,18.57788,17.54083,16.98152,16.5281,16.32348,,,
152333,5.877229844438715e+17,1237655467523440817,348072698833823744,,224.4118,0.872972,unclassified,unclassified,,,True,False,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.886849,4.802246,0.096834,0.0,0.0,6.027494,15.59836,35.66804,54.18317,69.241,19.68049,18.33228,17.55601,17.14058,16.83957,,,
377055,5.877352358077402e+17,1237661088028819723,1424315654340634624,,121.1749,23.20703,galaxy,,,,False,False,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.879096,4.797465,0.180899,0.0,0.0,3.676123,12.36802,39.75155,61.68689,82.68925,20.76941,18.89691,17.67373,17.17623,16.85844,,,
1304280,,1237648705652850793,3259573215663515648,,187.99617,0.873443,star,G2,,F8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.801435,,0.000397,0.0,0.000415,28.46226,59.25239,90.75126,103.6807,113.0357,19.12071,17.98461,17.58187,17.4557,17.369,0.53,5873.0,-0.45
494758,5.877420621689655e+17,1237667783393870210,2838473730756208640,,238.2359,14.40542,galaxy,STARFORMING,,,False,False,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.883602,4.185265,0.149265,0.0,0.0,8.900145,23.16944,51.36643,75.07117,93.31281,19.73616,18.39584,17.54081,17.09195,16.7918,,,
486122,5.877353490861921e+17,1237671123219186141,1978318613110614016,255044.0,124.951157,8.153255,galaxy,STARFORMING,Ec,,True,False,False,True,False,False,False,False,False,True,True,False,True,False,False,False,False,True,False,False,True,False,False,False,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,True,0.942848,6.156392,0.046154,0.0,0.0,14.58573,34.56861,72.85876,111.1381,141.0144,18.61777,17.19462,16.46165,16.04338,15.71882,,,
1292787,,1237666407919649033,1208176131514066944,,32.287647,-0.051418,star,M1,,M2Vvar,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.922338,,-0.0001,0.0,-5.8e-05,0.101099,2.791651,12.25412,26.21255,36.49457,23.56246,21.40031,19.82408,18.92991,18.46614,1.491,3980.0,-0.04
1294573,,1237651252022411704,8221370715431260160,,129.62132,51.721043,star,F3/F5V (30743),,M1V,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.853567,,0.00017,0.00017,0.0,4.223725,9.048829,14.82179,17.90459,19.57119,21.03679,20.1663,19.85361,19.73291,19.68928,1.474,3705.0,0.6
1399643,,1237651496297759358,4159246966596063232,,125.5024,51.406144,quasar,BROADLINE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.77562,,0.720685,0.72087,0.0,0.262655,0.822673,1.866184,3.775192,7.029656,24.41074,22.84772,21.47988,20.76151,20.2367,0.0,0.0,0.0


(1654168, 68)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1654168 entries, 0 to 1654167
Data columns (total 68 columns):
 #   Column                        Non-Null Count    Dtype  
---  ------                        --------------    -----  
 0   dr7objid                      658334 non-null   string 
 1   dr8objid                      1654168 non-null  string 
 2   specobjid                     1654168 non-null  string 
 3   gz2_filenumber                236466 non-null   string 
 4   ra                            1654168 non-null  float64
 5   dec                           1654168 non-null  float64
 6   sdss_clean_class_name         1654168 non-null  object 
 7   sdss_clean_subclass_name      1274218 non-null  object 
 8   gz2_subclass_name             236466 non-null   object 
 9   elodie_spectral_type          568139 non-null   object 
 10  gz1_flag_spiral               658334 non-null   object 
 11  gz1_flag_elliptical           658334 non-null   object 
 12  gz1_flag_uncer