# Consolidated Tables
This notebook creates 1 dataframes with consolidated information from the results .csv files. It also creates a function, pct_dif_table(), which will pare down the consolidated dataframe to include only measurements off of the median by a specified value.


In [1]:
import numpy as np
import pandas as pd

In [2]:
ella = pd.read_csv('results copy/ella_csp.csv')
emily = pd.read_csv('results copy/emily_csp.csv')
anish = pd.read_csv('results copy/anish_csp.csv')
inter = ella[ella.obj_id != '2007le']
ella_final = inter[inter.obj_id != '2008ia']

In [3]:
ella2 = ella_final.set_index(['obj_id', 'feat_name'])
emily2 = emily.set_index(['obj_id', 'feat_name'])
anish2 = anish.set_index(['obj_id', 'feat_name'])
anish3 = anish2.rename(columns = {'pew':'pew_anish', 'pew_samperr':'pew_samperr_anish'})
inter = ella2.join(emily2, lsuffix="_ella", rsuffix='_emily')
combined = inter.join(anish3)

## Simplified table with only measurements and errors

In [4]:
final = combined[['pew_ella', 'pew_samperr_ella', 'pew_emily', 'pew_samperr_emily', 'pew_anish', 'pew_samperr_anish']].copy()
final

Unnamed: 0_level_0,Unnamed: 1_level_0,pew_ella,pew_samperr_ella,pew_emily,pew_samperr_emily,pew_anish,pew_samperr_anish
obj_id,feat_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2004ef,pW1,13.486097,8.617782,,,10.200355,19.901073
2004ef,pW2,12.432846,3.538539,11.949161,3.397220,12.432846,3.538539
2004ef,pW3,86.949994,6.117037,86.759862,8.132574,88.346404,6.506680
2004ef,pW4,174.768556,7.198777,174.768556,7.198777,174.768556,7.198777
2004ef,pW5,77.613915,4.167822,84.288745,5.536000,81.630045,2.541775
...,...,...,...,...,...,...,...
2009ag,pW4,145.155597,3.440187,146.469528,2.545504,140.409265,6.526490
2009ag,pW5,67.846124,1.994622,67.041939,2.695988,67.041939,2.695988
2009ag,pW6,15.765346,1.123412,15.827794,1.186468,14.622018,0.820812
2009ag,pW7,104.919371,0.494985,104.947436,0.498510,104.823633,0.571549


## Creates table with specified percent difference

In [44]:
def pct_dif_table(table, percent):
    '''create table with measurement differences off of median by >= percent entered
    
    Parameters:
    table: should always be final, the table made above
    percent: percent difference
    
    Returns:
    df: final table'''
    
    final = table
    tot_pct = percent / 100
    pct = tot_pct*final.pew_median
    df = final[
        (abs(final.pew_ella - final.pew_emily) >= pct)
        | (abs(final.pew_ella - final.pew_anish) >= pct)
        | (abs(final.pew_emily - final.pew_anish) >= pct)
    ]
    return df

pct_dif_table(final, 5)

Unnamed: 0_level_0,Unnamed: 1_level_0,pew_ella,pew_samperr_ella,pew_emily,pew_samperr_emily,pew_anish,pew_samperr_anish,pew_median
obj_id,feat_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2004ef,pW1,13.486097,8.617782,,,10.200355,19.901073,11.843226
2004ef,pW5,77.613915,4.167822,84.288745,5.536000,81.630045,2.541775,81.630045
2004ey,pW2,9.309839,2.064206,8.916625,2.601660,8.345257,2.742019,8.916625
2004ey,pW5,86.568252,3.288412,70.909024,4.326454,70.909024,4.326454,70.909024
2004ey,pW6,13.282813,2.382360,9.803429,1.028993,9.803429,1.028993,9.803429
...,...,...,...,...,...,...,...,...
2009ad,pW3,89.261771,2.288761,82.512185,5.639453,83.054608,5.081306,83.054608
2009ad,pW5,44.910449,1.366176,44.938999,1.373069,39.533948,2.635123,44.910449
2009ad,pW6,12.839242,0.839784,12.772533,0.921242,12.182359,0.966042,12.772533
2009ad,pW7,40.902654,2.042618,,,34.084488,2.942108,37.493571
