In [1]:
import sklearn
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import *
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.externals import joblib

In [2]:
import numpy as np

from collections import *

import json
from sklearn.metrics import *
from sklearn.model_selection import cross_val_score


from feature_extraction import *
from classify import *
from book import *

In [3]:
from evaluation import *

ann_WOT, ann_SOC, ann_ASOIAF, ann_SA
lengths:  [432, 91, 256, 275]
POVs:  52 9 15 6


# Evaluation Program

In [4]:
import pandas as pd

def all_metrics(tt,pp):
    #prf = precision_recall_fscore_support(tt,pp, average='micro', labels=np.unique(tt))[0:3]
    acc = accuracy_score(tt,pp)
    return acc #np.hstack([prf, acc])

all_metrics_names = ["Acc"]

In [5]:
def make_classic_classifier():
    return make_pipeline(
    MaxAbsScaler(),
    LogisticRegression(C=1, dual=False, penalty="l2")
)

def make_highdim_classifier():
    return make_pipeline(
        StandardScaler(),
        sklearn.svm.SVC(C=1.0, probability=True)
    )


CL_mdl = lambda: MLCharacterSolver(make_classic_classifier(), nicknames2name_comb)
WE_mdl = lambda: MLCharacterSolver(make_highdim_classifier(), nicknames2name_comb, get_embedding_features)


FM_mdl = lambda: FirstMentionedSolver(nicknames2name_comb)
MC_mdl = lambda: MostMentionedSolver(nicknames2name_comb)

datasets = [("WOT", ann_WOT), ("ASOIAF", ann_ASOIAF), ("SOC", ann_SOC)]
supdatasets = [("SA", ann_SA)]
base_mdls = [("ML Classical Features", CL_mdl),
             ("ML Word Emb. Features", WE_mdl),
             ("First Mentioned", FM_mdl),
             ("Most Commonly Mentioned", MC_mdl)
       ]

## main eval

In [None]:
def make_program(datasets, mdls):
    program = OrderedDict()    
    for (test_data_name, test_data),(mdl_name,mdl) in it.product(datasets, mdls):
        if mdl_name[0:2]=="ML":
            for (train_data_name, train_data) in datasets:
                if train_data_name==test_data_name:
                    continue
                program[(test_data_name,mdl_name, train_data_name)] = (
                    train_data,
                    test_data,
                    mdl()
                )
        else:
            program[(test_data_name, mdl_name, "---")] = ([], test_data, mdl())
    return program

program = make_program(datasets, base_mdls)


res = pd.DataFrame(index=pd.MultiIndex.from_tuples(program.keys()),
                   columns = all_metrics_names)
res.sort_index(inplace=True)

for ind in res.index:
    print(ind, end="")
    score = evaluate(*program[ind], metric=all_metrics)
    res.loc[ind,:] = score
    print(" ", score)
    res.to_csv("../results/maineval.csv", index_label=["Test Set", "Method", "Train Set"])
    
res

('ASOIAF', 'First Mentioned', '---')  0.25
('ASOIAF', 'ML Classical Features', 'SOC')  0.953125
('ASOIAF', 'ML Classical Features', 'WOT')  0.984375
('ASOIAF', 'ML Word Emb. Features', 'SOC')  0.86328125
('ASOIAF', 'ML Word Emb. Features', 'WOT')  0.9765625
('ASOIAF', 'Most Commonly Mentioned', '---')  0.9140625
('SOC', 'First Mentioned', '---')  0.42857142857142855
('SOC', 'ML Classical Features', 'ASOIAF')  0.9230769230769231
('SOC', 'ML Classical Features', 'WOT')  0.9230769230769231
('SOC', 'ML Word Emb. Features', 'ASOIAF')  0.945054945054945
('SOC', 'ML Word Emb. Features', 'WOT')  0.9340659340659341
('SOC', 'Most Commonly Mentioned', '---')  0.7912087912087912
('WOT', 'First Mentioned', '---')

In [None]:
mdl = MC_mdl()
texts, ref_chars = extract_texts_and_characters(ann_WOT)
output_chars = list(mdl.choose_characters(texts))

In [None]:
fail_inds = np.asarray(ref_chars) != np.asarray(output_chars) == "[No Characters Detected]"

list(zip(np.asarray(output_chars)[fail_inds], np.asarray(ref_chars)[fail_inds]))

In [None]:
(texts[np.asarray(output_chars) == "[No Characters Detected]"])

## Cross Evaluation
To test how much it effects things from different styles.

In [None]:
def make_program(datasets, mdls):
    program = dict()    
    for (data_name, data),(mdl_name,mdl) in it.product(datasets, mdls):
        program[(data_name, mdl_name)] = (data, mdl())
    return program
program = make_program(datasets+[("Combined", ann_comb)], base_mdls)


res_xval = pd.DataFrame(index=pd.MultiIndex.from_tuples(program.keys()),
                        columns = all_metrics_names)
res_xval.sort_index(inplace=True)

for ind in res_xval.index:
    print(ind, end="")
    score = xval_evaluate(*program[ind], metric=all_metrics) 
    res_xval.loc[ind, :] = score
    print(" ", score)
    res_xval.to_csv("../results/crosseval.csv", index_label=["Dataset", "Method"])
    
res_xval

# Supp data

SA  ground truth is really weak.
It is for a chapter which has maybe 4 scenses only 1-2 of which will actually be about that character


In [None]:
def make_program(datasets, supdatasets, mdls):
    all_datasets = list(datasets)
    all_datasets.extend(supdatasets)
    
    program = OrderedDict()    
    for (test_data_name, test_data),(mdl_name,mdl) in it.product(datasets, mdls):
        if mdl_name[0:2]=="ML":
            combined_data = []
            combined_data_names = []
            for (train_data_name, train_data) in all_datasets:
                if train_data_name==test_data_name:
                    continue
                combined_data.append(train_data)
                combined_data_names.append(train_data_name)
            
            if len(combined_data) > 1:
                train_data_name = " and ".join(combined_data_names)
                program[(test_data_name, mdl_name, train_data_name)] = (
                    np.hstack(combined_data),
                    test_data,
                    mdl()
                )
    return program

program = make_program(datasets,supdatasets, base_mdls)


res = pd.DataFrame(index=pd.MultiIndex.from_tuples(program.keys()),
                   columns = all_metrics_names)
res.sort_index(inplace=True)

for ind in res.index:
    print(ind, end="")
    
    score = evaluate(*program[ind], metric=all_metrics)
    res.loc[ind,:] = score
    print(" ", score)
    res.to_csv("../results/extradata.csv", index_label=["Test Set", "Method", "Train Set"])
    
res

# Save some trained model

In [None]:
def train_and_save_model(ann, filename, model):
    mdl = model()
    mdl.train(*extract_texts_and_characters(ann))
    joblib.dump(mdl, "../trained_models/"+filename+".pkl")
    return mdl

In [None]:
CL_SOC = train_and_save_model(ann_SOC, "CL_SOC", CL_mdl)
#HY_SOC = train_and_save_model(ann_SOC, "HY_SOC", HY_mdl)

In [None]:
CL_ASOIAF = train_and_save_model(ann_ASOIAF, "CL_ASOIAF", CL_mdl)
#HY_ASOIAF = train_and_save_model(ann_ASOIAF, "HY_ASOIAF", HY_mdl)

## Feature importance stuff

In [None]:
def feature_importance(mdl):
    _, _,vector_keys = get_feature_vectors(ann_comb[1]['text'])
    feature_weights = list(zip(mdl.classifier.feature_importances_,vector_keys))
    feature_weights.sort(reverse=True)
    non_zero_weights = [(weight,name) for weight, name in feature_weights if weight>0]
    print("Number of nonzeo weights: ", len(non_zero_weights))
    print("\n".join(", ".join(map(str,wt)) for wt in non_zero_weights))


In [None]:
feature_importance(CL_SOC)

In [None]:
feature_importance(HY_SOC)

In [None]:
feature_importance(CL_ASOIAF)

In [None]:
feature_importance(HY_ASOIAF)