# Consolidated Tables
This notebook creates 1 dataframe with consolidated information from the results .csv files. It also creates a function, pct_dif_table(), which will pare down the consolidated dataframe to include only measurements off of the median by a specified value.


In [None]:
import numpy as np
import pandas as pd


In [None]:
ella = pd.read_csv('../results/ella_csp.csv')
emily = pd.read_csv('../results/emily_csp.csv')
anish = pd.read_csv('../results/anish_csp.csv')
inter = ella[ella.obj_id != '2007le']
ella_final = inter[inter.obj_id != '2008ia']


In [None]:
ella2 = ella_final.set_index(['obj_id', 'feat_name'])
emily2 = emily.set_index(['obj_id', 'feat_name'])
anish2 = anish.set_index(['obj_id', 'feat_name'])
anish3 = anish2.rename(columns = {'pew':'pew_anish', 'pew_samperr':'pew_samperr_anish'})
inter = ella2.join(emily2, lsuffix="_ella", rsuffix='_emily')
combined = inter.join(anish3)


## Simplified table with only measurements and errors

In [None]:
final = combined[['pew_ella', 'pew_samperr_ella', 'pew_emily', 'pew_samperr_emily', 'pew_anish', 'pew_samperr_anish']].copy()
final


## Creates table with specified percent difference

In [None]:
def pct_dif_table(table, percent):
    """create table with measurement differences off of median by >= percent entered
    
    Parameters:
        table: should always be final, the table made above
        percent: percent difference
    
    Returns:
        df: final table
    """
    
    final = table
    tot_pct = percent / 100
    pct = tot_pct*final.pew_median
    df = final[
        (abs(final.pew_ella - final.pew_emily) >= pct)
        | (abs(final.pew_ella - final.pew_anish) >= pct)
        | (abs(final.pew_emily - final.pew_anish) >= pct)
    ]
    return df


In [None]:
pct_dif_table(final, 5)
