In [1]:
import numpy as np
from astropy.table import Table, join, Column
from astropy.io import fits

# Set up the table and load everyone's classifications

In [2]:
def build_table(results):
    results = [r.split() for r in results]
    resultst = []
    for row in results:
        if len(row) == 2:
            resultst.append({'name':row[0], 'morph':row[1], 'flags':''})
        else:
            resultst.append({'name':row[0], 'morph':row[1], 'flags':row[2]})
            
    results = Table(resultst)
    results.rename_column('morph', 'morph_{}'.format(name))
    results.rename_column('flags', 'flags_{}'.format(name))
    
    return results

In [3]:
classifications = Table()
classifiers = ['christine', 'pier', 'tom', 'vandana', 'matt']

for name in classifiers:
    with open('results_{}.dat'.format(name)) as fin:
        results = fin.readlines()    
    results = build_table(results)

    if len(classifications) == 0:
        classifications = results
        continue
    else:
        classifications = join(classifications, results, keys='name', join_type='left')

# Merge all of the flag columns and remove duplicates
flagcol = []
for row in classifications:
    allflags = [row['flags_'+cler] for cler in classifiers]
    flagcol.append('|'.join(set('|'.join(filter(None, allflags)).split('|'))))
classifications.add_column(Column(flagcol), name='flags')
classifications = classifications['name', 'morph_matt', 'morph_pier', 'morph_tom', 
                                  'morph_christine', 'morph_vandana', 'flags']

# Add RA and DEC to the table for matching purposes
classifications['RA'] = np.zeros(len(classifications))
classifications['DEC'] = np.zeros(len(classifications))

for row in classifications:
    hdr = fits.open('/Users/matthewkirby/mymorphologies/classification_code/cutouts/{}'.format(row['name']))[0].header
    row['RA'] = hdr['RA']
    row['DEC'] = hdr['DEC']

# Combine the two filters
1. Make a copy of the table and rename everything to A and B
2. Loop over original table

    A. Loop over the secondary table
    
        I. Delete the identical row  in secondary
        II. Copy the morphs from the second filter in secondary
        III. Delete second filter row from primary

In [4]:
secondary = Table(classifications, copy=True)
for cl in classifiers:
    classifications.rename_column('morph_'+cl, 'morph_'+cl+'_A')
    classifications['morph_'+cl+'_B'] = 'no_classssssssssssssssssss'
    classifications['matching_cutout'] = 'cutoutXXXXXXXXXX.fits'

In [5]:
# Match between the two filters
for ogrow in classifications:
    for row in secondary:
        if abs(row['RA']-ogrow['RA']) < 0.0002 and abs(row['DEC']-ogrow['DEC']) < 0.0002:
            if not row['name'] == ogrow['name']:
                ogrow['matching_cutout'] = row['name']
                for cl in classifiers:
                    ogrow['morph_'+cl+'_B'] = row['morph_'+cl]
                break

In [6]:
# Delete duplicate rows
for i in range(int(len(classifications)/2)):
    rowidx = np.where(classifications['name'] == classifications[i]['matching_cutout'])[0][0]
    classifications.remove_row(rowidx)

# Set the final classifications

In [7]:
# Step 1: In a single morph response, establish weight based on the amount of | in the response
# Step 2: Add the weight to the appropriate bin in the dictionary 
# Step 3: Return the highest weighted response
def find_morph(morphs):
    cl = {'E':0.0, 'S0':0.0, 'Sa':0.0, 'Sb':0.0, 'Sc':0.0, 'Sd':0.0, 'Sm':0.0, 'Irr':0.0, 'U':0.0, 'C':0.0}
    for m in morphs:
        w = 1./(1+m.count('|'))
        m = m.split('|')
        for submorph in m:
            cl = apply_weight(submorph, w, cl)
    
    # Select the most popular classification. In a tie, randomly choose
    vmax = max(cl.values())
    final_options = [k for k in cl.keys() if abs(cl[k]-vmax) <= 0.01]
    return np.random.choice(final_options)
    
def apply_weight(m, w, cl):
    if m == 'Unclassifiable':
        cl['U'] += w
    elif m == 'Compact,-not-star':
        cl['C'] += w
    else:
        cl[m] += w
    return cl
            

In [8]:
good_class = ['christine', 'vandana', 'matt']
final_morphs = []
for row in classifications:
    morphs = [row['morph_'+cl+'_A'] for cl in good_class] + [row['morph_'+cl+'_B'] for cl in good_class]
    final_morphs.append(find_morph(morphs))
classifications.add_column(Column(final_morphs), name='final')

# Evaluate how much each person agrees with the final morphology

In [9]:
for cl in classifiers:
    f = 0.0
    N = 0.0
    for row in classifications:
        if row['final'] in ['U', 'C']:
            continue
        N += 1.0
        if row['final'] in row['morph_'+cl+'_A'].split('|'):
            f+=1.0
            continue
        if row['final'] in row['morph_'+cl+'_B'].split('|'):
            f+=1.0
            continue        
    
    print('{} got {} out of {} true. {} percent'. format(cl, f, N, f/N))

christine got 104.0 out of 127.0 true. 0.8188976377952756 percent
pier got 74.0 out of 127.0 true. 0.5826771653543307 percent
tom got 67.0 out of 127.0 true. 0.5275590551181102 percent
vandana got 101.0 out of 127.0 true. 0.7952755905511811 percent
matt got 92.0 out of 127.0 true. 0.7244094488188977 percent


In [10]:
def count_class(cllist, morph):
    tot=0
    for m in morph:
        tot+=len(np.where(cllist['final'] == m)[0])
    return tot

In [11]:
n_e = count_class(classifications, ['E'])
n_s0 = count_class(classifications, ['S0'])
n_sp = count_class(classifications, ['Sa', 'Sb', 'Sc', 'Sd', 'Sm'])
n_irr = count_class(classifications, ['Irr'])

In [12]:
print('{} ellipticals'.format(n_e))
print('{} s0'.format(n_s0))
print('{} spirals'.format(n_sp))
print('{} irregulars'.format(n_irr))
print('{} others'.format(len(classifications)-n_e-n_s0-n_sp-n_irr))

37 ellipticals
14 s0
55 spirals
21 irregulars
12 others


In [13]:
output_catalog = classifications['name', 'RA', 'DEC', 'final', 'flags']
output_catalog.write('final_morphologies.csv', format='csv', overwrite=True)