In [1]:
import os, sys
import numpy as np

from astropy.io import fits
from astropy.table import Table, join
import pandas as pd
import fnmatch

#import desispec
# library location will change ..
#sys.path.append("/global/homes/r/rtojeiro/prospect/prospect/py")
#from prospect import utils_specviewer,plotframes
import matplotlib.pyplot as plt 

In [2]:
pd.set_option('display.max_rows', 20)

In [3]:
#set to directory with all the VI files to merge
VI_dir = os.environ['HOME']+'/Dropbox/DESI/DESI-VI/SV0/LRG/'

In [4]:
#we will read all the *.csv files in this directory. Change as needed.

all_files = os.listdir(VI_dir)
vi_files=[]

pattern = "desi*.csv"
for entry in all_files:
    if fnmatch.fnmatch(entry, pattern):
            vi_files.append(entry)
            
vi_files

['desi-vi_SV0_LRG_tile68002_night20200315_3_KSD.csv',
 'desi-vi_SV0_LRG_tile68002_night20200315_2_KSD.csv',
 'desi-vi_SV0_LRG_tile68002_night20200315_9_KSD.csv',
 'desi-vi_SV0_LRG_tile68002_night20200315_1_KSD.csv',
 'desi-vi_SV0_LRG_tile68002_night20200315_8_KSD.csv',
 'desi-vi_SV0_LRG_tile68002_night20200315_1_RT.csv',
 'desi-vi_SV0_LRG_tile68002_night20200315_4_KSD.csv',
 'desi-vi_SV0_LRG_tile68002_night20200315_5_KSD.csv',
 'desi-vi_SV0_LRG_tile68002_night20200315_7_KSD.csv',
 'desi-vi_SV0_LRG_tile68002_night20200315_2_RT.csv',
 'desi-vi_SV0_LRG_tile68002_night20200315_6_KSD.csv']

In [5]:
vi = pd.read_csv(VI_dir + vi_files[0], delimiter = " , ", engine='python')

for i in range(1,len(vi_files)):
    print(vi_files[i])
    vi2 = pd.read_csv(VI_dir + vi_files[i], delimiter = " , ", engine='python')
    vi = vi.append(vi2, ignore_index=True)
    
#make groups of visual inspections, grouped by unique objects, and state number of single and multiple VIs
vi_gp = vi.groupby(['TargetID'])
print('There are ' + str(len(vi)) + ' visual inspections of a total of ' + str(len(vi_gp)) + ' unique objects')

desi-vi_SV0_LRG_tile68002_night20200315_2_KSD.csv
desi-vi_SV0_LRG_tile68002_night20200315_9_KSD.csv
desi-vi_SV0_LRG_tile68002_night20200315_1_KSD.csv
desi-vi_SV0_LRG_tile68002_night20200315_8_KSD.csv
desi-vi_SV0_LRG_tile68002_night20200315_1_RT.csv
desi-vi_SV0_LRG_tile68002_night20200315_4_KSD.csv
desi-vi_SV0_LRG_tile68002_night20200315_5_KSD.csv
desi-vi_SV0_LRG_tile68002_night20200315_7_KSD.csv
desi-vi_SV0_LRG_tile68002_night20200315_2_RT.csv
desi-vi_SV0_LRG_tile68002_night20200315_6_KSD.csv
There are 549 visual inspections of a total of 450 unique objects


In [6]:
#vi is a dataframe
vi

Unnamed: 0,TargetID,ExpID,Spec version,Redrock version,Redrock spectype,Redrock z,VI scanner,VI class,VI issue,VI z,VI spectype,VI comment
0,35185929941421294,-1,0.33.0.dev3890,0,GALAXY,0.9847,KSD,4,--,0.9847,GALAXY,--
1,35185929941421564,-1,0.33.0.dev3890,0,GALAXY,0.9417,KSD,4,--,0.9417,GALAXY,calibration residual at 5800 AA - dichroic region
2,35185929941422067,-1,0.33.0.dev3890,0,GALAXY,1.0303,KSD,4,--,1.0303,GALAXY,--
3,35185929945612409,-1,0.33.0.dev3890,0,GALAXY,0.9340,KSD,4,--,0.934,GALAXY,--
4,35185929945612482,-1,0.33.0.dev3890,0,GALAXY,1.0455,KSD,4,--,1.0455,GALAXY,--
...,...,...,...,...,...,...,...,...,...,...,...,...
544,35185935951859826,-1,0.33.0.dev3890,0,GALAXY,0.8454,KSD,2,--,0.8454,GALAXY,--
545,35185935951860142,-1,0.33.0.dev3890,0,GALAXY,1.0425,KSD,4,--,1.0425,GALAXY,--
546,35185935960244525,-1,0.33.0.dev3890,0,GALAXY,1.1907,KSD,4,--,1.1907,GALAXY,--
547,35185935960247271,-1,0.33.0.dev3890,0,GALAXY,0.8466,KSD,4,--,0.8466,GALAXY,--


In [7]:
vi.keys()

Index(['TargetID', 'ExpID', 'Spec version', 'Redrock version',
       'Redrock spectype', 'Redrock z', 'VI scanner', 'VI class', 'VI issue',
       'VI z', 'VI spectype', 'VI comment'],
      dtype='object')

In [8]:
#make new column with best redshift estimate for each VI - take VI redshift if available, else take Redrock redshift. 
#I am always assuming that the VI redshift, if provided, trumps over the Redrock redshift. 
vi['best redshift'] = vi['VI z']
vi.loc[vi['best redshift']=='--', 'best redshift'] = vi.loc[vi['best redshift']=='--', 'Redrock z']
vi['best redshift'] = vi['best redshift'].astype(float)

In [9]:
vi

Unnamed: 0,TargetID,ExpID,Spec version,Redrock version,Redrock spectype,Redrock z,VI scanner,VI class,VI issue,VI z,VI spectype,VI comment,best redshift
0,35185929941421294,-1,0.33.0.dev3890,0,GALAXY,0.9847,KSD,4,--,0.9847,GALAXY,--,0.9847
1,35185929941421564,-1,0.33.0.dev3890,0,GALAXY,0.9417,KSD,4,--,0.9417,GALAXY,calibration residual at 5800 AA - dichroic region,0.9417
2,35185929941422067,-1,0.33.0.dev3890,0,GALAXY,1.0303,KSD,4,--,1.0303,GALAXY,--,1.0303
3,35185929945612409,-1,0.33.0.dev3890,0,GALAXY,0.9340,KSD,4,--,0.934,GALAXY,--,0.9340
4,35185929945612482,-1,0.33.0.dev3890,0,GALAXY,1.0455,KSD,4,--,1.0455,GALAXY,--,1.0455
...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,35185935951859826,-1,0.33.0.dev3890,0,GALAXY,0.8454,KSD,2,--,0.8454,GALAXY,--,0.8454
545,35185935951860142,-1,0.33.0.dev3890,0,GALAXY,1.0425,KSD,4,--,1.0425,GALAXY,--,1.0425
546,35185935960244525,-1,0.33.0.dev3890,0,GALAXY,1.1907,KSD,4,--,1.1907,GALAXY,--,1.1907
547,35185935960247271,-1,0.33.0.dev3890,0,GALAXY,0.8466,KSD,4,--,0.8466,GALAXY,--,0.8466


In [10]:
#add new columns, holding the mean of the flags and the maximum difference in flag classification
vi['vi_combined_flag'] = vi.groupby('TargetID')['VI class'].transform('mean')
vi['vi_diff'] = vi.groupby('TargetID')['VI class'].transform(lambda x: ( x.max()-x.min()) )

In [11]:
#add new column, with the mean redshift from all values of 'best redshift'
vi['vi_combined_z'] = vi.groupby('TargetID')['best redshift'].transform('mean')
vi['dz'] = vi.groupby('TargetID')['best redshift'].transform(lambda x: ( (x.max() - x.min()) / (1+x.min()) ))

Get a table that holds only the objects that have been inspected more than once, and for which the individual VI classifications differ by 2 or more, or delta z / (1 + z) > 0.0033 (these are the conflicts to resolve)

In [12]:
vi_conflict = vi_gp.filter(lambda x: ( ( (x['VI class'].max()-x['VI class'].min()) >= 2) 
                       | ( (x['best redshift'].max() - x['best redshift'].min()) / (1+x['best redshift'].min()) > 0.0033 ) )
                       & (len(x) >= 2)) #x is a group by TargetID

Get the target IDs of the problematic objects and display in table form for a quick summary:

In [13]:
unique_targets = np.unique(vi_conflict['TargetID'].tolist())
print('Targets with problematic VI: ', unique_targets)
print('Total number of conflicts to resolve: ', len(unique_targets))

Targets with problematic VI:  [35185923939371905 35185923939373860 35185923943564221 35185923943564344
 35185923951955494 35185929933031946 35185929937224636]
Total number of conflicts to resolve:  7


In [14]:
for i in range(len(unique_targets)): 
    display(vi[vi.TargetID==unique_targets[i]])

Unnamed: 0,TargetID,ExpID,Spec version,Redrock version,Redrock spectype,Redrock z,VI scanner,VI class,VI issue,VI z,VI spectype,VI comment,best redshift,vi_combined_flag,vi_diff,vi_combined_z,dz
153,35185923939371905,-1,0,0,GALAXY,1.0205,ksd,3,--,1.0205,GALAXY,--,1.0205,2.5,1,0.76265,0.342703
253,35185923939371905,-1,0.33.0.dev3890,0,GALAXY,1.0205,RT,2,S,0.5048,--,"fit to HK"","" possible issue with continuum in...",0.5048,2.5,1,0.76265,0.342703


Unnamed: 0,TargetID,ExpID,Spec version,Redrock version,Redrock spectype,Redrock z,VI scanner,VI class,VI issue,VI z,VI spectype,VI comment,best redshift,vi_combined_flag,vi_diff,vi_combined_z,dz
156,35185923939373860,-1,0,0,GALAXY,1.6997,ksd,3,RC,1.212,QSO,--,1.212,2.0,2,1.214,0.001808
256,35185923939373860,-1,0.33.0.dev3890,0,GALAXY,1.6997,RT,1,RC,1.216,--,QSO?,1.216,2.0,2,1.214,0.001808


Unnamed: 0,TargetID,ExpID,Spec version,Redrock version,Redrock spectype,Redrock z,VI scanner,VI class,VI issue,VI z,VI spectype,VI comment,best redshift,vi_combined_flag,vi_diff,vi_combined_z,dz
160,35185923943564221,-1,0,0,GALAXY,0.8969,ksd,4,--,0.8969,GALAXY,--,0.8969,3.0,2,0.8969,0.0
260,35185923943564221,-1,0.33.0.dev3890,0,GALAXY,0.8969,RT,2,--,--,--,--,0.8969,3.0,2,0.8969,0.0


Unnamed: 0,TargetID,ExpID,Spec version,Redrock version,Redrock spectype,Redrock z,VI scanner,VI class,VI issue,VI z,VI spectype,VI comment,best redshift,vi_combined_flag,vi_diff,vi_combined_z,dz
161,35185923943564344,-1,0,0,QSO,0.802,ksd,2,--,1.42,GALAXY,--,1.42,1.5,1,1.111,0.342952
261,35185923943564344,-1,0.33.0.dev3890,0,QSO,0.802,RT,1,R,--,--,--,0.802,1.5,1,1.111,0.342952


Unnamed: 0,TargetID,ExpID,Spec version,Redrock version,Redrock spectype,Redrock z,VI scanner,VI class,VI issue,VI z,VI spectype,VI comment,best redshift,vi_combined_flag,vi_diff,vi_combined_z,dz
185,35185923951955494,-1,0,0,GALAXY,0.8556,ksd,4,--,0.8556,GALAXY,--,0.8556,3.0,2,0.8556,0.0
284,35185923951955494,-1,0.33.0.dev3890,0,GALAXY,0.8556,RT,2,--,--,--,--,0.8556,3.0,2,0.8556,0.0


Unnamed: 0,TargetID,ExpID,Spec version,Redrock version,Redrock spectype,Redrock z,VI scanner,VI class,VI issue,VI z,VI spectype,VI comment,best redshift,vi_combined_flag,vi_diff,vi_combined_z,dz
79,35185929933031946,-1,0,0,GALAXY,0.7492,ksd,4,--,0.7492,GALAXY,--,0.7492,3.0,2,0.7492,0.0
478,35185929933031946,-1,0.33.0.dev3890,0,GALAXY,0.7492,RT,2,--,--,--,--,0.7492,3.0,2,0.7492,0.0


Unnamed: 0,TargetID,ExpID,Spec version,Redrock version,Redrock spectype,Redrock z,VI scanner,VI class,VI issue,VI z,VI spectype,VI comment,best redshift,vi_combined_flag,vi_diff,vi_combined_z,dz
91,35185929937224636,-1,0,0,QSO,1.0286,ksd,1,--,0.0086,STAR,"lots of signal"","" but I can't identify object",0.0086,1.5,1,0.5186,1.011303
490,35185929937224636,-1,0.33.0.dev3890,0,QSO,1.0286,RT,2,C,--,GALAXY,--,1.0286,1.5,1,0.5186,1.011303


## This is where I resolve things manually - with care!!
### I think it's better to keep it in a notebook, as typos can be backtracked rather than a single manual edit of a text file

Suggestion: we edit either 'dz' or 'vi diff' or 'vi_combined_flag' to resolve conflict. At the end, we look for conflicts again and we should find none.


In [15]:
#first, keep a safe copy of the original dataframe
vi_safe = vi.copy()

We will inspect each conflict on a prospect window, and resolve each conflict in turn

In [16]:
#Keep track of the conflicts by conflict_id
conflict_id=0
#first, remind myself of the problem:
display(vi[vi.TargetID==unique_targets[conflict_id]])

Unnamed: 0,TargetID,ExpID,Spec version,Redrock version,Redrock spectype,Redrock z,VI scanner,VI class,VI issue,VI z,VI spectype,VI comment,best redshift,vi_combined_flag,vi_diff,vi_combined_z,dz
153,35185923939371905,-1,0,0,GALAXY,1.0205,ksd,3,--,1.0205,GALAXY,--,1.0205,2.5,1,0.76265,0.342703
253,35185923939371905,-1,0.33.0.dev3890,0,GALAXY,1.0205,RT,2,S,0.5048,--,"fit to HK"","" possible issue with continuum in...",0.5048,2.5,1,0.76265,0.342703


In [17]:
#now I change either 'best redshift' or 'vi ' or 'vi_combined_flag' to resolve conflict
#in this case there are two conflicts: dz and vi_diff. I'm going to keep Alastair's VI redshift, and keep the combined flag, and remove the problematic vi_diff by setting VI class to VI combined
vi_df.loc[vi_df.TargetID==unique_targets[0], 'best redshift'] = 0.0704
vi_df.loc[vi_df.TargetID==unique_targets[0], 'VI class'] = 3.0


#look at the values again to make sure all is well
display(vi_df[vi_df.TargetID==unique_targets[0]])


NameError: name 'vi_df' is not defined

In [None]:
#next one!
display(vi_df[vi_df.TargetID==unique_targets[1]])

In [None]:
#now I change either 'best redshift' or 'vi ' or 'vi_combined_flag' to resolve conflict
#in this case I can't make a call without looking at the actual spectra, but let's assume I agree with Kelly! The only conflict is dz, so I only need to change 'best redshift'

vi_df.loc[vi_df.TargetID==unique_targets[1], 'best redshift'] = 0.1475

#look at the values again to make sure all is well
display(vi_df[vi_df.TargetID==unique_targets[1]])

## you need to recompute combined redshift?

### and so on...

We should now recompute the conflicts, and not find any (except I didn't resolve everything! but the number of conflicts should now be 10, not 12)

In [None]:
vi_gp = vi_df.groupby(['TargetID'])
vi_insp = vi_gp.filter(lambda x: ( ( (x['VI class'].max()-x['VI class'].min()) >= 2) 
                       | ( (x['best redshift'].max() - x['best redshift'].min()) / (1+x['best redshift'].mean()) > 0.0033 ) )
                       & (len(x) >= 2)) #x is a group by TargetID

In [None]:
unique_targets = np.unique(vi_insp['TargetID'].tolist())
print('Targets with problematic VI: ', unique_targets)
print('Total number of conflicts to resolve: ', len(unique_targets))

In [None]:
#group by TargetID and make a csv file with the 1st of each group.
vi_gp.first().to_csv(VI_dir+'truth_table_test.txt')