In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
#import seaborn as sns
import pymongo
from pprint import pprint

# Load Mongo DB Collection

In [7]:
client = pymongo.MongoClient()
db = client.metrics_coref_rnn

In [8]:
def group_by(df, bycols, agg_map):
    """

    @param df:      DataFrame
    @param bycols:  str or list
                        Column(s) to group by
    @param agg_map: dictionary or list of 2-tuples
                        Mapping from column to aggregate function e.g. [("city", "count"), ("salary", "mean"]
    @return:        DataFrame
                        Flattened dataframe, with multi-level index removed
    """
    grps = []
    if type(bycols) == str:
        bycols = [bycols]

    if type(agg_map) == dict:
        agg_map = agg_map.items()

    for k,v in agg_map:
        grp = df[bycols + [k]].groupby(bycols, ).agg(v)
        grp.reset_index(inplace=True)
        grp["%s(%s)" % (v,k)] = grp[k]
        del grp[k]
        grps.append(grp)

    m = grps[0]
    for grp in grps[1:]:
        m = pd.merge(m, grp, on=bycols, how="inner")
    return m

In [9]:
from bson.son import SON # needed to ensure dictionary is ordered (python default is not)
import hashlib

def hash_feats(fts):
    vals = fts.values
    joined = "|".join(map(lambda s: str(s),vals)).encode('utf-8') 
    return hashlib.sha224(joined).hexdigest()

def get_df_sorted_by_f1score(collection, params=None, filter_cols=True):
    if not params:
        params = []
    if type(params) == str:
        params = params.split(",")
    
    project = {
            "weighted_f1_score":"$WEIGHTED_MEAN_CONCEPT_CODES.f1_score",
            "macro_f1_score":   "$MACRO_F1",
            "micro_f1_score":  "$MICRO_F1.f1_score",
            "micro_recall":    "$MICRO_F1.recall",
            "micro_precision": "$MICRO_F1.precision",
    
    # PARAMETERS            
            "window_size":    "$parameters.window_size",
            "feats":          "$parameters.extractors",
            "count": {        "$size" : "$parameters.extractors" },
            "asof" :          "$asof",
            "_id":1
    }
    
    # No count for HMM
    if "_hmm" in collection.lower():
        del project["count"]
    
    for param in params:
        project[param] = "$parameters." + param

    feats_pipeline = [{
        "$project": project
    },
    {
        "$match":{
            "micro_f1_score": { "$exists" : True }        
        }
    },
    {
        "$sort":{
            "micro_f1_score": -1
        }
    },
    ]
    
    rows = [row for row in db[collection].aggregate(feats_pipeline)]
    df = pd.DataFrame(rows).sort_values("micro_f1_score", ascending=False)
    if params:
        df["hs_params"] = df[params].apply(hash_feats, axis=1)
        
    if filter_cols:
        cols = ["micro_f1_score", "micro_recall" ,"micro_precision", "macro_f1_score" ] + params
        return df[cols]
    return df

In [10]:
def get_window_classifier_results(collections):
    dfs = []
    for col in collections:
        print(col)
        df = dict(get_df_sorted_by_f1score(col).iloc[0,:])
        df["Collection_" + col] = col
        dfs.append(df)
    return pd.DataFrame(dfs).sort_values("micro_f1_score", ascending=False)

# Which Problem Transformation Method Was Best?

In [11]:
def round_data(df, places=3):
    df_copy = df.copy()
    fmt_str = "{0:." + str(places) + "f}"
    cols = set([v for v in df_copy.columns.values if "micro_" in v])
    for c in cols:
        df_copy[c] = df[c].apply(lambda d: fmt_str.format(d))  
    return df_copy

## Coral Bleaching

In [13]:
get_df_sorted_by_f1score?

In [16]:
# get all results
df = get_df_sorted_by_f1score("CB_TAGGING_VD_RNN_BINARY_FIXED")
df

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,macro_f1_score
0,0.312012,0.294118,0.332226,0.0
1,0.307692,0.241176,0.42487,0.0
2,0.284585,0.211765,0.433735,0.0
3,0.164009,0.105882,0.363636,0.0
4,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0


In [17]:
df = get_window_classifier_results(["CB_TAGGING_VD_RNN_BINARY_FIXED"])
df = round_data(df, 4)
df#["Collection_CB_TAGGING_VD,micro_f1_score,micro_recall,micro_precision".split(",")]

CB_TAGGING_VD_RNN_BINARY_FIXED


Unnamed: 0,Collection_CB_TAGGING_VD_RNN_BINARY_FIXED,macro_f1_score,micro_f1_score,micro_precision,micro_recall
0,CB_TAGGING_VD_RNN_BINARY_FIXED,0.0,0.312,0.3322,0.2941


## Skin Cancer

In [18]:
df = get_window_classifier_results(["SC_TAGGING_VD_RNN_BINARY_FIXED"])
df = round_data(df, 4)
df#["Collection_SC_TAGGING_VD,micro_f1_score,micro_recall,micro_precision".split(",")]

SC_TAGGING_VD_RNN_BINARY_FIXED


Unnamed: 0,Collection_SC_TAGGING_VD_RNN_BINARY_FIXED,macro_f1_score,micro_f1_score,micro_precision,micro_recall
0,SC_TAGGING_VD_RNN_BINARY_FIXED,0.0,0.3549,0.5701,0.2577


# Hyper Parameter Tuning Results

In [19]:
# Rows to print - df.head
ROWS = 5

- To get the TD and VD metrics, change the collection names below (VD->TD)

## RNN Anaphor Tagger Hyper Parameter Tuning

### Coral Bleaching

In [30]:
params = "use_pretrained_embedding,bi-directional,num_rnns,hidden_size".split(",") # merge_mode,
collection = "CB_TAGGING_VD_RNN_BINARY_FIXED"

df = get_df_sorted_by_f1score(collection, params)
    
round_data(df.head(ROWS),4)
#df

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,macro_f1_score,use_pretrained_embedding,bi-directional,num_rnns,hidden_size
0,0.312,0.2941,0.3322,0.0,True,True,2,256
1,0.3077,0.2412,0.4249,0.0,True,True,2,128
2,0.2846,0.2118,0.4337,0.0,True,True,1,256
3,0.164,0.1059,0.3636,0.0,True,True,2,64
4,0.0,0.0,0.0,0.0,True,True,1,128


### Skin Cancer

In [31]:
#params = "dual,C,penalty,fit_intercept,multi_class,window_size".split(",")
collection = "SC_TAGGING_VD_RNN_BINARY_FIXED"

df = get_df_sorted_by_f1score(collection, params)
#df = df[df.dual==True]
#df = df[df.C==0.5]
#df = df[df.penalty =='l2']
round_data(df.head(ROWS),3)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,macro_f1_score,use_pretrained_embedding,bi-directional,num_rnns,hidden_size
0,0.355,0.258,0.57,0.0,True,True,1,256
1,0.34,0.27,0.458,0.0,True,True,2,256
2,0.316,0.215,0.597,0.0,True,True,2,128
3,0.2,0.121,0.584,0.0,True,True,1,128
4,0.143,0.086,0.433,0.0,True,True,2,64


# Test Set Performance

## <span style="color:red; font-weight:bold">TODO >>> </span>

### Coral Bleaching

In [12]:
# params = "dual,C,penalty,fit_intercept,multi_class,window_size".split(",")
# collection = "TEST_CB_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS"

# df = get_df_sorted_by_f1score(collection, params)
# round_data(df.head(ROWS),3)

### Skin Cancer

In [13]:
# params = "dual,C,penalty,fit_intercept,multi_class,window_size".split(",")
# collection = "TEST_SC_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS"

# df = get_df_sorted_by_f1score(collection, params)
# round_data(df.head(ROWS),3)