In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
#import seaborn as sns
import pymongo
from pprint import pprint

In [2]:
client = pymongo.MongoClient()
db = client.metrics

In [3]:
def group_by(df, bycols, agg_map):
    """

    @param df:      DataFrame
    @param bycols:  str or list
                        Column(s) to group by
    @param agg_map: dictionary or list of 2-tuples
                        Mapping from column to aggregate function e.g. [("city", "count"), ("salary", "mean"]
    @return:        DataFrame
                        Flattened dataframe, with multi-level index removed
    """
    grps = []
    if type(bycols) == str:
        bycols = [bycols]

    if type(agg_map) == dict:
        agg_map = agg_map.items()

    for k,v in agg_map:
        grp = df[bycols + [k]].groupby(bycols, ).agg(v)
        grp.reset_index(inplace=True)
        grp["%s(%s)" % (v,k)] = grp[k]
        del grp[k]
        grps.append(grp)

    m = grps[0]
    for grp in grps[1:]:
        m = pd.merge(m, grp, on=bycols, how="inner")
    return m

In [88]:
from bson.son import SON # needed to ensure dictionary is ordered (python default is not)
import hashlib

def hash_feats(fts):
    vals = fts.values
    joined = "|".join(map(lambda s: str(s),vals)).encode('utf-8') 
    return hashlib.sha224(joined).hexdigest()

def get_df_sorted_by_f1score(collection, params=None, filter_cols=True):
    if not params:
        params = []
    if type(params) == str:
        params = params.split(",")
    
    project = {
            "weighted_f1_score":"$WEIGHTED_MEAN_CONCEPT_CODES.f1_score",
            "macro_f1_score":   "$MACRO_F1",
            "micro_f1_score":  "$MICRO_F1.f1_score",
            "micro_recall":    "$MICRO_F1.recall",
            "micro_precision": "$MICRO_F1.precision",
    
    # PARAMETERS            
            "window_size":    "$parameters.window_size",
            "feats":          "$parameters.extractors",
            "count": {        "$size" : "$parameters.extractors" },
            "asof" :          "$asof",
            "_id":1
    }
    
    # No count for HMM
    if "_hmm" in collection.lower():
        del project["count"]
    
    for param in params:
        project[param] = "$parameters." + param

    feats_pipeline = [{
        "$project": project
    },
    {
        "$match":{
            "micro_f1_score": { "$exists" : True }        
        }
    },
    {
        "$sort":{
            "micro_f1_score": -1
        }
    },
    ]
    
    rows = [row for row in db[collection].aggregate(feats_pipeline)]
    df = pd.DataFrame(rows).sort_values("micro_f1_score", ascending=False)
    if params:
        df["hs_params"] = df[params].apply(hash_feats, axis=1)
        
    if filter_cols:
        cols = ["micro_f1_score", "micro_recall" ,"micro_precision", "macro_f1_score" ] + params
        return df[cols]
    return df

In [89]:
def get_window_classifier_results(prefix):
    collections = "WINDOW_CLASSIFIER_BR,WINDOW_CLASSIFIER_LBL_POWERSET_MULTICLASS,WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS".split(",")
    dfs = []
    for c in collections:
        col = prefix + c
        print(col)
        df = dict(get_df_sorted_by_f1score(col).iloc[0,:])
        df["Collection_" + prefix[:-1]] = col.replace(prefix,"")
        dfs.append(df)
    return pd.DataFrame(dfs).sort_values("micro_f1_score", ascending=False)

In [90]:
def round_data(df, places=3):
    df_copy = df.copy()
    fmt_str = "{0:." + str(places) + "f}"
    cols = set([v for v in df_copy.columns.values if "micro_" in v])
    for c in cols:
        df_copy[c] = df[c].apply(lambda d: fmt_str.format(d))  
    return df_copy

In [91]:
def compute_macro_metrics(coll):
    for row in db[coll].find({}):
        precision, recall = [],[]
        keys = []
        for k in row.keys():
            if k[0].isdigit():
                keys.append(k)
                code, prec, rec = k, row[k]["precision"], row[k]["recall"]
                precision.append(prec)
                recall.append(rec)
        macro_f1 = row["MACRO_F1"]
        mprec = np.mean(precision)
        mrec =  np.mean(recall)
        est_mf1 = (2 * mprec * mrec) / (mprec + mrec)
        print(coll)
        #print(macro_f1, est_mf1)
        print("macro_f1:{mf1:.4f}\tmrec:{rec:.4f}\tmprec:{prec:.4f}".format(code=code, mf1=macro_f1, prec=mprec, rec=mrec))
        print(",".join(sorted(keys)))
        print("")

In [92]:
for collection in "TEST_CB_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS,TEST_CB_TAGGING_VD_CRF_MOST_COMMON_TAG,TEST_CB_TAGGING_VD_HMM_MOST_COMMON_TAG_MULTICLASS,TEST_CB_TAGGING_VD_AVG_PERCEPTRON_MOST_COMMON_TAG,TEST_CB_TAGGING_VD_RNN_MOST_COMMON_TAG".split(","):
    compute_macro_metrics(collection)

TEST_CB_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS
macro_f1:0.7400	mrec:0.6887	mprec:0.7995
1,11,12,13,14,2,3,4,5,50,5b,6,7

TEST_CB_TAGGING_VD_CRF_MOST_COMMON_TAG
macro_f1:0.7250	mrec:0.6763	mprec:0.7812
1,11,12,13,14,2,3,4,5,50,5b,6,7

TEST_CB_TAGGING_VD_HMM_MOST_COMMON_TAG_MULTICLASS
macro_f1:0.6575	mrec:0.7246	mprec:0.6017
1,11,12,13,14,2,3,4,5,50,5b,6,7

TEST_CB_TAGGING_VD_AVG_PERCEPTRON_MOST_COMMON_TAG
macro_f1:0.7365	mrec:0.6906	mprec:0.7891
1,11,12,13,14,2,3,4,5,50,5b,6,7

TEST_CB_TAGGING_VD_RNN_MOST_COMMON_TAG
macro_f1:0.7692	mrec:0.7558	mprec:0.7830
1,11,12,13,14,2,3,4,5,50,5b,6,7



In [93]:
for collection in "TEST_CB_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS,TEST_CB_TAGGING_VD_CRF_MOST_COMMON_TAG,TEST_CB_TAGGING_VD_HMM_MOST_COMMON_TAG_MULTICLASS,TEST_CB_TAGGING_VD_AVG_PERCEPTRON_MOST_COMMON_TAG,TEST_CB_TAGGING_VD_RNN_MOST_COMMON_TAG".replace("CB","SC").split(","):
    compute_macro_metrics(collection)

TEST_SC_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS
macro_f1:0.7606	mrec:0.6928	mprec:0.8432
1,11,12,2,3,4,5,50,6

TEST_SC_TAGGING_VD_CRF_MOST_COMMON_TAG
macro_f1:0.7557	mrec:0.6851	mprec:0.8425
1,11,12,2,3,4,5,50,6

TEST_SC_TAGGING_VD_HMM_MOST_COMMON_TAG_MULTICLASS
macro_f1:0.6439	mrec:0.6778	mprec:0.6132
1,11,12,2,3,4,5,50,6

TEST_SC_TAGGING_VD_AVG_PERCEPTRON_MOST_COMMON_TAG
macro_f1:0.7573	mrec:0.6896	mprec:0.8397
1,11,12,2,3,4,5,50,6

TEST_SC_TAGGING_VD_RNN_MOST_COMMON_TAG
macro_f1:0.7793	mrec:0.7111	mprec:0.8619
1,11,12,2,3,4,5,50,6



In [94]:
def print_table_row(coll, algo):
    for row in db[coll].find({}):
        precision, recall = [],[]
        keys = []
        for k in row.keys():
            if k[0].isdigit():
                keys.append(k)
                code, prec, rec = k, row[k]["precision"], row[k]["recall"]
                precision.append(prec)
                recall.append(rec)
        macro_f1 = row["MACRO_F1"]
        mprec = np.mean(precision)
        mrec =  np.mean(recall)
        est_mf1 = (2 * mprec * mrec) / (mprec + mrec)
        print("{algo} &\t{mf1:.3f}\t\t\t&\t{rec:.3f}\t\t\t&\t{prec:.3f} \\\\".format(algo=algo.ljust(25), code=code, mf1=macro_f1, prec=mprec, rec=mrec))
        #print(",".join(sorted(keys)))        

## Generate Macro Metrics Table

In [95]:
def get_algo_name(coll):
    if "WINDOW" in coll:
        return "Window-Based Tagger"
    if "CRF" in coll:
        return "CRF"
    if "HMM" in coll:
        return "HMM"
    if "PERCEPTRON" in coll:
        return "Structured Perceptron"
    if "RNN" in coll:
        return "Bidirectional RNN"

### Coral Bleaching

In [96]:
for collection in "TEST_CB_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS,TEST_CB_TAGGING_VD_CRF_MOST_COMMON_TAG,TEST_CB_TAGGING_VD_HMM_MOST_COMMON_TAG_MULTICLASS,TEST_CB_TAGGING_VD_AVG_PERCEPTRON_MOST_COMMON_TAG,TEST_CB_TAGGING_VD_RNN_MOST_COMMON_TAG".split(","):
    print_table_row(collection, get_algo_name(collection))

Window-Based Tagger       &	0.740			&	0.689			&	0.800 \\
CRF                       &	0.725			&	0.676			&	0.781 \\
HMM                       &	0.657			&	0.725			&	0.602 \\
Structured Perceptron     &	0.737			&	0.691			&	0.789 \\
Bidirectional RNN         &	0.769			&	0.756			&	0.783 \\


### Skin Cancer

In [97]:
for collection in "TEST_CB_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS,TEST_CB_TAGGING_VD_CRF_MOST_COMMON_TAG,TEST_CB_TAGGING_VD_HMM_MOST_COMMON_TAG_MULTICLASS,TEST_CB_TAGGING_VD_AVG_PERCEPTRON_MOST_COMMON_TAG,TEST_CB_TAGGING_VD_RNN_MOST_COMMON_TAG".split(","):
    collection = collection.replace("CB","SC")
    #print(collection)
    print_table_row(collection, get_algo_name(collection))

Window-Based Tagger       &	0.761			&	0.693			&	0.843 \\
CRF                       &	0.756			&	0.685			&	0.843 \\
HMM                       &	0.644			&	0.678			&	0.613 \\
Structured Perceptron     &	0.757			&	0.690			&	0.840 \\
Bidirectional RNN         &	0.779			&	0.711			&	0.862 \\
