In [241]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import pymongo
from pprint import pprint

In [242]:
def group_by(df, bycols, agg_map):
    """

    @param df:      DataFrame
    @param bycols:  str or list
                        Column(s) to group by
    @param agg_map: dictionary or list of 2-tuples
                        Mapping from column to aggregate function e.g. [("city", "count"), ("salary", "mean"]
    @return:        DataFrame
                        Flattened dataframe, with multi-level index removed
    """
    grps = []
    if type(bycols) == str:
        bycols = [bycols]

    if type(agg_map) == dict:
        agg_map = agg_map.items()

    for k,v in agg_map:
        grp = df[bycols + [k]].groupby(bycols, ).agg(v)
        grp.reset_index(inplace=True)
        grp["%s(%s)" % (v,k)] = grp[k]
        del grp[k]
        grps.append(grp)

    m = grps[0]
    for grp in grps[1:]:
        m = pd.merge(m, grp, on=bycols, how="inner")
    return m

In [295]:
from bson.son import SON # needed to ensure dictionary is ordered (python default is not)
import hashlib

def hash_feats(fts):
    vals = fts.values
    joined = "|".join(map(str,vals))
    return hashlib.sha224(joined).hexdigest()

def get_df_sorted_by_f1score(collection, params=None, filter_cols=True):
    if not params:
        params = []
    if type(params) == str:
        params = params.split(",")
    
    project = {
            "weighted_f1_score":"$WEIGHTED_MEAN_CONCEPT_CODES.f1_score",
            "micro_f1_score":  "$MICRO_F1.f1_score",
            "micro_recall":    "$MICRO_F1.recall",
            "micro_precision": "$MICRO_F1.precision",
    
    # PARAMETERS            
            "window_size":    "$parameters.window_size",
            "feats":          "$parameters.extractors",
            "count": {        "$size" : "$parameters.extractors" },
            "asof" :          "$asof",
            "_id":1
    }
    
    # No count for HMM
    if "_hmm" in collection.lower():
        del project["count"]
    
    for param in params:
        project[param] = "$parameters." + param

    feats_pipeline = [{
        "$project": project
    },
    {
        "$match":{
            "micro_f1_score": { "$exists" : True }        
        }
    },
    {
        "$sort":{
            "micro_f1_score": -1
        }
    },
    ]
    
    rows = [row for row in db[collection].aggregate(feats_pipeline)]
    df = pd.DataFrame(rows).sort_values("micro_f1_score", ascending=False)
    if params:
        df["hs_params"] = df[params].apply(hash_feats, axis=1)
        
    if filter_cols:
        cols = ["micro_f1_score", "micro_recall" ,"micro_precision" ] + params
        return df[cols]
    return df

In [296]:
def get_window_classifier_results(prefix):
    collections = "WINDOW_CLASSIFIER_BR,WINDOW_CLASSIFIER_LBL_POWERSET_MULTICLASS,WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS".split(",")
    dfs = []
    for c in collections:
        col = prefix + c
        print col
        df = dict(get_df_sorted_by_f1score(col).iloc[0,:])
        df["Collection_" + prefix[:-1]] = col.replace(prefix,"")
        dfs.append(df)
    return pd.DataFrame(dfs).sort_values("micro_f1_score", ascending=False)

# Which Problem Transformation Method Was Best?

## Coral Bleaching

In [281]:
df = get_window_classifier_results("CB_TAGGING_VD_")
df["Collection_CB_TAGGING_VD,micro_f1_score,micro_recall,micro_precision".split(",")]

CB_TAGGING_VD_WINDOW_CLASSIFIER_BR
CB_TAGGING_VD_WINDOW_CLASSIFIER_LBL_POWERSET_MULTICLASS
CB_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS


Unnamed: 0,Collection_CB_TAGGING_VD,micro_f1_score,micro_recall,micro_precision
2,WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS,0.835846,0.793314,0.883197
1,WINDOW_CLASSIFIER_LBL_POWERSET_MULTICLASS,0.835756,0.793114,0.883244
0,WINDOW_CLASSIFIER_BR,0.828318,0.777359,0.886428


## Skin Cancer

In [282]:
df = get_window_classifier_results("SC_TAGGING_VD_")
df["Collection_SC_TAGGING_VD,micro_f1_score,micro_recall,micro_precision".split(",")]

SC_TAGGING_VD_WINDOW_CLASSIFIER_BR
SC_TAGGING_VD_WINDOW_CLASSIFIER_LBL_POWERSET_MULTICLASS
SC_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS


Unnamed: 0,Collection_SC_TAGGING_VD,micro_f1_score,micro_recall,micro_precision
1,WINDOW_CLASSIFIER_LBL_POWERSET_MULTICLASS,0.813778,0.779144,0.851636
2,WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS,0.813778,0.779144,0.851636
0,WINDOW_CLASSIFIER_BR,0.807288,0.765052,0.85446


** Unsurprisingly in this case, as there were only two MLC labels, the score for LBL powerset and Common tag are the same **

** HOWEVER - why is the multiclass version that much better? It does OVR, and with only 2 records difference, this makes no sense to me **

# Hyper Parameter Tuning Results

In [291]:
# Rows to print - df.head
ROWS = 5

## Window Based Classifier - Hyper Parameter Tuning

### Coral Bleaching

In [292]:
params = "dual,C,penalty,fit_intercept,multi_class".split(",")
collection = "CB_TAGGING_VD_WINDOW_CLASSIFIER_LBL_POWERSET_MULTICLASS_HYPER_PARAM_TUNING"

df = get_df_sorted_by_f1score(collection, params)
df.head(ROWS)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,C,penalty,fit_intercept,multi_class
0,0.836792,0.789918,0.889579,True,0.5,l2,True,ovr
1,0.83675,0.789868,0.889548,False,0.5,l2,True,ovr
2,0.835756,0.793114,0.883244,False,1.0,l2,True,ovr
3,0.835745,0.793114,0.88322,True,1.0,l2,True,ovr
4,0.835178,0.791366,0.884125,True,0.5,l2,False,ovr


### Skin Cancer

In [293]:
params = "dual,C,penalty,fit_intercept,multi_class".split(",")
collection = "SC_TAGGING_VD_WINDOW_CLASSIFIER_LBL_POWERSET_MULTICLASS_HYPER_PARAM_TUNING"

df = get_df_sorted_by_f1score(collection, params)
df.head(ROWS)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,dual,C,penalty,fit_intercept,multi_class
0,0.815927,0.777754,0.858041,False,0.5,l2,True,ovr
1,0.815909,0.777781,0.857968,True,0.5,l2,True,ovr
2,0.815091,0.782878,0.85007,False,1.0,l1,True,ovr
3,0.814214,0.776909,0.855283,False,0.5,l1,True,ovr
4,0.813818,0.779171,0.851691,True,1.0,l2,True,ovr


## CRF Performance - Hyper Parameter Tuning 

### Coral Bleaching

In [260]:
cols = "micro_f1_score,micro_precision,micro_recall,feature_possible_states,feature_possible_transitions,c2".split(",")
df = get_df_sorted_by_f1score("CB_TAGGING_VD_CRF_LBL_POWERSET_HYPERPARAM_OPT",
                         "feature_possible_states,feature_possible_transitions,c2".split(","))
df[cols].head(ROWS)

Unnamed: 0,micro_f1_score,micro_precision,micro_recall,feature_possible_states,feature_possible_transitions,c2
0,0.829887,0.887694,0.779148,False,True,1.0
1,0.829362,0.887119,0.778666,False,False,1.0
2,0.828921,0.885422,0.779199,False,True,0.5
3,0.827848,0.890057,0.773768,False,True,2.0
4,0.826696,0.879389,0.77996,False,False,0.1


### Skin Cancer

In [261]:
cols = "micro_f1_score,micro_precision,micro_recall,feature_possible_states,feature_possible_transitions,c2".split(",")
df = get_df_sorted_by_f1score("SC_TAGGING_VD_CRF_LBL_POWERSET_HYPERPARAM_OPT",
                         "feature_possible_states,feature_possible_transitions,c2".split(","))
df[cols].head(ROWS)

Unnamed: 0,micro_f1_score,micro_precision,micro_recall,feature_possible_states,feature_possible_transitions,c2
0,0.803346,0.859727,0.753904,False,False,2.0
1,0.803316,0.859553,0.753986,False,True,2.0
2,0.802608,0.856701,0.75494,False,False,1.0
3,0.802369,0.853438,0.757066,False,True,0.5
4,0.802277,0.856052,0.754858,False,True,1.0


## HMM - Hyper Parameter Tuning (Features in this case)

### Coral Bleaching

In [297]:
params = "extractors".split(",")
collection = "CB_TAGGING_VD_HMM"

df = get_df_sorted_by_f1score(collection, params)
df.head(ROWS)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,extractors
0,0.764348,0.790924,0.7395,stemmed_unigrams
1,0.758365,0.77996,0.737934,unigrams


In [300]:
params = "extractors".split(",")
collection = "CB_TAGGING_VD_HMM_LBL_POWERSET"

df = get_df_sorted_by_f1score(collection, params)
df.head(ROWS)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,extractors
0,0.769735,0.802193,0.739801,stemmed_unigrams
1,0.764508,0.791051,0.739689,unigrams


# <span style="color:red">_NEED TO DO MOST COMMON TAG_!!!!<span>

### Skin Cancer

In [298]:
params = "extractors".split(",")
collection = "SC_TAGGING_VD_HMM"

df = get_df_sorted_by_f1score(collection, params)
df.head(ROWS)

Unnamed: 0,micro_f1_score,micro_recall,micro_precision,extractors
0,0.664386,0.70408,0.628929,stemmed_unigrams
1,0.664386,0.70408,0.628929,stemmed_unigrams
2,0.661275,0.690834,0.634141,unigrams


## Average Perceptron - Hyper Parameter Tuning Results

### Coral Bleaching

In [262]:
model = "AVG_PERCEPTRON_MULTICLASS"
df = get_df_sorted_by_f1score("CB_TAGGING_VD_" + model, 
                              "prev_tag_sharing,num_iterations,tag_history,combo_freq_threshold")
df.head(ROWS)["micro_f1_score,micro_precision,micro_recall,prev_tag_sharing,num_iterations,tag_history,combo_freq_threshold".split(",")]

Unnamed: 0,micro_f1_score,micro_precision,micro_recall,prev_tag_sharing,num_iterations,tag_history,combo_freq_threshold
0,0.837699,0.887409,0.793264,True,10,10,5
1,0.837588,0.890081,0.790942,True,5,3,5
2,0.837174,0.889588,0.790592,True,5,1,5
3,0.837016,0.886593,0.792689,True,10,15,5
4,0.836976,0.889048,0.790667,True,5,2,5


### Skin Cancer

In [264]:
model = "AVG_PERCEPTRON_MULTICLASS"
df = get_df_sorted_by_f1score("SC_TAGGING_VD_" + model, 
                              "prev_tag_sharing,num_iterations,tag_history,combo_freq_threshold")
df.head(ROWS)["micro_f1_score,micro_precision,micro_recall,prev_tag_sharing,num_iterations,tag_history,combo_freq_threshold".split(",")]

Unnamed: 0,micro_f1_score,micro_precision,micro_recall,prev_tag_sharing,num_iterations,tag_history,combo_freq_threshold
0,0.817549,0.862104,0.777372,True,5,0,5
1,0.817457,0.861064,0.778053,True,5,2,5
2,0.816527,0.85897,0.778081,True,5,8,5
3,0.816329,0.8586,0.778026,True,10,3,5
4,0.816291,0.858315,0.77819,True,10,8,5
