# What's in DP0.3? Which columns are empty?

A column is empty if:
 * both its min and max value are NaN, or
 * both its min and max value are zero, or
 * the first and last of sorted values are empty strings.

In [1]:
from lsst.rsp import get_tap_service
import matplotlib.pyplot as plt
import numpy as np

In [2]:
service = get_tap_service("ssotap")
print(service.baseurl)

https://data.lsst.cloud/api/ssotap


In [3]:
# query = "SELECT * FROM tap_schema.schemas"
# results = service.search(query).to_table()
# results

In [4]:
# query = "SELECT * FROM tap_schema.tables " \
#         "WHERE tap_schema.tables.schema_name = 'dp03_catalogs_10yr' "
# results = service.search(query).to_table()
# results

## MPCORB

In [5]:
results = service.search("SELECT column_name, datatype from TAP_SCHEMA.columns "
                         "WHERE table_name = 'dp03_catalogs_10yr.MPCORB'").to_table().to_pandas()
# results

In [6]:
colnames = results['column_name']
datatypes = results['datatype']
del results

In [33]:
results = service.search("SELECT MIN(ssObjectId) AS minval, "
                         "MAX(ssObjectId) AS maxval, "
                         "COUNT(ssObjectId) AS count "
                         "FROM dp03_catalogs_10yr.MPCORB ").to_table().to_pandas()
results

Unnamed: 0,minval,maxval,count
0,-9223370383071521539,9223370875126069107,14462388


In [34]:
print(int(results['maxval'][0] - (0.02 * results['maxval'][0])))

9038903457623547904


In [35]:
del results

In [37]:
results = service.search("SELECT * "
                         "FROM dp03_catalogs_10yr.MPCORB "
                         "WHERE ssObjectId > 9038903457623547904").to_table().to_pandas()
print(len(results))

144472


In [40]:
flag = np.zeros(len(colnames), dtype='int')
for i, (col, dtype) in enumerate(zip(colnames, datatypes)):
    if dtype != 'char':
        nanmin = np.nanmin(results[col])
        nanmax = np.nanmax(results[col])
        if np.isnan(nanmin) & np.isnan(nanmax):
            flag[i] = 1
        elif np.isfinite(nanmin) & np.isfinite(nanmax):
            if ((nanmin == 0.0) | (nanmax == 0.0)) & (nanmax - nanmin == 0.0):
                flag[i] = 1
        print('%2i  %2i  %-30s %-10s %20.4E %20.4E' % (i, flag[i], col, dtype, 
                                                       np.nanmin(results[col]), 
                                                       np.nanmax(results[col])))
    else:
        uvals = np.unique(results[col])
        if uvals[0] == '':
            flag[i] = 1
        print('%2i  %2i  %-30s %-10s %20s %20s' % (i, flag[i], col, dtype, uvals[0], uvals[-1]))

  nanmin = np.nanmin(results[col])
  nanmax = np.nanmax(results[col])
  np.nanmin(results[col]),
  np.nanmax(results[col])))


 0   1  arc                            float                       NAN                  NAN
 1   1  arcEnd                         char                                                
 2   1  arcStart                       char                                                
 3   1  computer                       char                                                
 4   0  e                              double               1.4632E-04           1.1363E+01
 5   0  epoch                          double               5.9443E+04           6.4675E+04
 6   1  flags                          int                  0.0000E+00           0.0000E+00
 7   0  fullDesignation                char              2011 1062 T-3      2011 iso0012110
 8   0  incl                           double               8.0000E-05           1.7897E+02
 9   1  lastIncludedObservation        float                       NAN                  NAN
10   0  mpcDesignation                 char                   1062 T-3          

### List of empty columns

In [41]:
tx = np.where(flag == 1)[0]
for x in tx:
    print(colnames[x])

arc
arcEnd
arcStart
computer
flags
lastIncludedObservation
mpcNumber
n
nobs
nopp
pertsLong
pertsShort
reference
rms
uncertaintyParameter


In [42]:
del results, flag, colnames, datatypes

## SSObject

In [43]:
results = service.search("SELECT column_name, datatype from TAP_SCHEMA.columns "
                         "WHERE table_name = 'dp03_catalogs_10yr.SSObject'").to_table().to_pandas()
# results

In [44]:
colnames = results['column_name']
datatypes = results['datatype']
del results

In [45]:
results = service.search("SELECT * "
                         "FROM dp03_catalogs_10yr.SSObject "
                         "WHERE ssObjectId > 9038903457623547904").to_table().to_pandas()

In [46]:
flag = np.zeros(len(colnames), dtype='int')
for i, (col, dtype) in enumerate(zip(colnames, datatypes)):
    if dtype != 'char':
        nanmin = np.nanmin(results[col])
        nanmax = np.nanmax(results[col])
        if np.isnan(nanmin) & np.isnan(nanmax):
            flag[i] = 1
        elif np.isfinite(nanmin) & np.isfinite(nanmax):
            if ((nanmin == 0.0) | (nanmax == 0.0)) & (nanmax - nanmin == 0.0):
                flag[i] = 1
        print('%2i  %2i  %-30s %-10s %20.4E %20.4E' % (i, flag[i], col, dtype, 
                                                       np.nanmin(results[col]), 
                                                       np.nanmax(results[col])))
    else:
        uvals = np.unique(results[col])
        if uvals[0] == '':
            flag[i] = 1
        print('%2i  %2i  %-30s %-10s %20s %20s' % (i, flag[i], col, dtype, uvals[0], uvals[-1]))

 0   0  arc                            float                0.0000E+00           3.6509E+03
 1   0  discoverySubmissionDate        double               6.0218E+04           6.3872E+04
 2   0  firstObservationDate           double               6.0218E+04           6.3865E+04
 3   0  flags                          long                 6.6000E+01           2.1740E+03
 4   0  g_Chi2                         float                2.9486E-03           1.1336E+03
 5   0  g_G12                          float               -1.2398E+05           2.3839E+06
 6   0  g_G12Err                       float                1.1028E-03           1.5701E+12
 7   0  g_H                            float                9.4873E+00           3.9079E+01
 8   0  g_H_gG12_Cov                   float               -8.1780E+16           1.1349E+18
 9   0  g_HErr                         float                1.6906E-04           9.7831E+05
10   0  g_Ndata                        int                  0.0000E+00          

  nanmin = np.nanmin(results[col])
  nanmax = np.nanmax(results[col])
  np.nanmin(results[col]),
  np.nanmax(results[col])))


### List of empty columns

In [47]:
tx = np.where(flag == 1)[0]
for x in tx:
    print(colnames[x])

maxExtendedness
medianExtendedness
minExtendedness
MOID
MOIDDeltaV
MOIDEclipticLongitude
MOIDTrueAnomaly
u_Chi2
u_G12
u_G12Err
u_H
u_H_uG12_Cov
u_HErr
u_Ndata
y_Chi2
y_G12
y_G12Err
y_H
y_H_yG12_Cov
y_HErr
y_Ndata


In [48]:
del results, flag

## DiaSource

In [49]:
results = service.search("SELECT column_name, datatype from TAP_SCHEMA.columns "
                         "WHERE table_name = 'dp03_catalogs_10yr.DiaSource'").to_table().to_pandas()
# results

In [50]:
colnames = results['column_name']
datatypes = results['datatype']
del results

In [56]:
results = service.search("SELECT MIN(diaSourceId) AS minval, "
                         "MAX(diaSourceId) AS maxval, "
                         "COUNT(diaSourceId) AS count "
                         "FROM dp03_catalogs_10yr.DiaSource ").to_table().to_pandas()
results

Unnamed: 0,minval,maxval,count
0,-9223372009544453425,9223372024865461180,653005444


In [57]:
print(int(results['maxval'][0] - (0.0005 * results['maxval'][0])))

9218760338853028864


In [58]:
del results

In [60]:
results = service.search("SELECT * "
                         "FROM dp03_catalogs_10yr.DiaSource "
                         "WHERE ssObjectId > 9218760338853028864").to_table().to_pandas()
print(len(results))

162269


In [61]:
flag = np.zeros(len(colnames), dtype='int')
for i, (col, dtype) in enumerate(zip(colnames, datatypes)):
    if dtype != 'char':
        nanmin = np.nanmin(results[col])
        nanmax = np.nanmax(results[col])
        if np.isnan(nanmin) & np.isnan(nanmax):
            flag[i] = 1
        elif np.isfinite(nanmin) & np.isfinite(nanmax):
            if ((nanmin == 0.0) | (nanmax == 0.0)) & (nanmax - nanmin == 0.0):
                flag[i] = 1
        print('%2i  %2i  %-30s %-10s %20.4E %20.4E' % (i, flag[i], col, dtype, 
                                                       np.nanmin(results[col]), 
                                                       np.nanmax(results[col])))
    else:
        uvals = np.unique(results[col])
        if uvals[0] == '':
            flag[i] = 1
        print('%2i  %2i  %-30s %-10s %20s %20s' % (i, flag[i], col, dtype, uvals[0], uvals[-1]))

 0   0  band                           char                          g                    z
 1   0  ccdVisitId                     long                 1.6830E+03           2.0857E+06
 2   0  dec                            double              -6.9948E+01           3.5642E+01
 3   0  decErr                         float                1.9799E-06           6.3192E-01
 4   0  decTrue                        double              -6.9948E+01           3.5642E+01
 5   0  diaObjectId                    long                -9.2232E+18           9.2233E+18
 6   0  diaSourceId                    long                -9.2233E+18           9.2234E+18
 7   0  mag                            float                1.5748E+01           5.0101E+01
 8   0  magErr                         float                1.0000E-03           9.1070E+00
 9   0  magTrueVband                   float                1.6232E+01           3.4004E+01
10   0  midPointMjdTai                 double               6.0220E+04          

  if ((nanmin == 0.0) | (nanmax == 0.0)) & (nanmax - nanmin == 0.0):


18   0  ssObjectReassocTime            char                60219.99373          63869.32741


### List of empty columns

In [62]:
tx = np.where(flag == 1)[0]
for x in tx:
    print(colnames[x])

ra_dec_Cov


In [63]:
del results, flag

## SSSource

In [64]:
results = service.search("SELECT column_name, datatype from TAP_SCHEMA.columns "
                         "WHERE table_name = 'dp03_catalogs_10yr.SSSource'").to_table().to_pandas()
# results

In [65]:
colnames = results['column_name']
datatypes = results['datatype']
del results

In [66]:
results = service.search("SELECT * "
                         "FROM dp03_catalogs_10yr.SSSource "
                         "WHERE ssObjectId > 9218760338853028864").to_table().to_pandas()
print(len(results))

162269


In [67]:
flag = np.zeros(len(colnames), dtype='int')
for i, (col, dtype) in enumerate(zip(colnames, datatypes)):
    if dtype != 'char':
        nanmin = np.nanmin(results[col])
        nanmax = np.nanmax(results[col])
        if np.isnan(nanmin) & np.isnan(nanmax):
            flag[i] = 1
        elif np.isfinite(nanmin) & np.isfinite(nanmax):
            if ((nanmin == 0.0) | (nanmax == 0.0)) & (nanmax - nanmin == 0.0):
                flag[i] = 1
        print('%2i  %2i  %-30s %-10s %20.4E %20.4E' % (i, flag[i], col, dtype, 
                                                       np.nanmin(results[col]), 
                                                       np.nanmax(results[col])))
    else:
        uvals = np.unique(results[col])
        if uvals[0] == '':
            flag[i] = 1
        print('%2i  %2i  %-30s %-10s %20s %20s' % (i, flag[i], col, dtype, uvals[0], uvals[-1]))

 0   0  diaSourceId                    long                -9.2233E+18           9.2234E+18
 1   0  eclipticBeta                   double              -5.6310E+01           4.0350E+01
 2   0  eclipticLambda                 double               9.4427E-03           3.5999E+02
 3   0  galacticB                      double              -8.9138E+01           8.3551E+01
 4   0  galacticL                      double               2.8331E-04           3.6000E+02
 5   0  heliocentricDist               float                9.8169E-01           6.1776E+01
 6   0  heliocentricVX                 float               -1.8945E-02           1.8605E-02
 7   0  heliocentricVY                 float               -1.3782E-02           1.5872E-02
 8   0  heliocentricVZ                 float               -9.8119E-03           1.0604E-02
 9   0  heliocentricX                  float               -3.9302E+01           4.1593E+01
10   0  heliocentricY                  float               -2.9044E+01          

  if ((nanmin == 0.0) | (nanmax == 0.0)) & (nanmax - nanmin == 0.0):
  nanmin = np.nanmin(results[col])
  nanmax = np.nanmax(results[col])
  np.nanmin(results[col]),
  np.nanmax(results[col])))


### List of empty columns

In [68]:
tx = np.where(flag == 1)[0]
for x in tx:
    print(colnames[x])

mpcUniqueId
predictedDecErr
predictedMagnitude
predictedMagnitudeErr
predictedRaDecCov
predictedRaErr
residualDec
residualRa


In [69]:
del results, flag