In [1]:
import sklearn
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import *
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.externals import joblib

In [2]:
import numpy as np

from collections import *

import json
from sklearn.metrics import *
from sklearn.model_selection import cross_val_score


from feature_extraction import *
from classify import *
from book import *

In [3]:
from evaluation import *

ann_WOT, ann_SOC, ann_ASOIAF, ann_SA
lengths:  [432, 91, 256, 275]
POVs:  52 9 15 6


# Evaluation Program

In [4]:
import pandas as pd

def all_metrics(tt,pp):
    #prf = precision_recall_fscore_support(tt,pp, average='micro', labels=np.unique(tt))[0:3]
    acc = accuracy_score(tt,pp)
    return acc #np.hstack([prf, acc])

all_metrics_names = ["Acc"]

In [5]:
def make_classic_classifier():
    return make_pipeline(
    MaxAbsScaler(),
    LogisticRegression(C=1, dual=False, penalty="l2")
)

def make_highdim_classifier():
    return make_pipeline(
        StandardScaler(),
        sklearn.svm.SVC(C=1.0, probability=True)
    )


CL_mdl = lambda: MLCharacterSolver(make_classic_classifier(), nicknames2name_comb)
WE_mdl = lambda: MLCharacterSolver(make_highdim_classifier(), nicknames2name_comb, get_embedding_features)


FM_mdl = lambda: FirstMentionedSolver(nicknames2name_comb)
MC_mdl = lambda: MostMentionedSolver(nicknames2name_comb)

datasets = [("WOT", ann_WOT), ("ASOIAF", ann_ASOIAF), ("SOC", ann_SOC)]
supdatasets = [("SA", ann_SA)]
base_mdls = [("ML Classical Features", CL_mdl),
             ("ML Word Emb. Features", WE_mdl),
             ("First Mentioned", FM_mdl),
             ("Most Commonly Mentioned", MC_mdl)
       ]

## main eval

In [6]:
def make_program(datasets, mdls):
    program = OrderedDict()    
    for (test_data_name, test_data),(mdl_name,mdl) in it.product(datasets, mdls):
        if mdl_name[0:2]=="ML":
            for (train_data_name, train_data) in datasets:
                if train_data_name==test_data_name:
                    continue
                program[(test_data_name,mdl_name, train_data_name)] = (
                    train_data,
                    test_data,
                    mdl()
                )
        else:
            program[(test_data_name, mdl_name, "---")] = ([], test_data, mdl())
    return program

program = make_program(datasets, base_mdls)


res = pd.DataFrame(index=pd.MultiIndex.from_tuples(program.keys()),
                   columns = all_metrics_names)
res.sort_index(inplace=True)

for ind in res.index:
    print(ind, end="")
    score = evaluate(*program[ind], metric=all_metrics)
    res.loc[ind,:] = score
    print(" ", score)
    res.to_csv("../resulthtos/maineval.csv", index_label=["Test Set", "Method", "Train Set"])
    
res

('ASOIAF', 'First Mentioned', '---')  0.25
('ASOIAF', 'ML Classical Features', 'SOC')  0.953125
('ASOIAF', 'ML Classical Features', 'WOT')  0.984375
('ASOIAF', 'ML Word Emb. Features', 'SOC')  0.86328125
('ASOIAF', 'ML Word Emb. Features', 'WOT')  0.9765625
('ASOIAF', 'Most Commonly Mentioned', '---')  0.9140625
('SOC', 'First Mentioned', '---')  0.42857142857142855
('SOC', 'ML Classical Features', 'ASOIAF')  0.9230769230769231
('SOC', 'ML Classical Features', 'WOT')  0.9230769230769231
('SOC', 'ML Word Emb. Features', 'ASOIAF')  0.945054945054945
('SOC', 'ML Word Emb. Features', 'WOT')  0.9340659340659341
('SOC', 'Most Commonly Mentioned', '---')  0.7912087912087912
('WOT', 'First Mentioned', '---')  0.04398148148148148
('WOT', 'ML Classical Features', 'ASOIAF')  0.7453703703703703
('WOT', 'ML Classical Features', 'SOC')  0.7013888888888888
('WOT', 'ML Word Emb. Features', 'ASOIAF')  0.6990740740740741
('WOT', 'ML Word Emb. Features', 'SOC')  0.5509259259259259
('WOT', 'Most Commonly 

Unnamed: 0,Unnamed: 1,Unnamed: 2,Acc
ASOIAF,First Mentioned,---,0.25
ASOIAF,ML Classical Features,SOC,0.953125
ASOIAF,ML Classical Features,WOT,0.984375
ASOIAF,ML Word Emb. Features,SOC,0.863281
ASOIAF,ML Word Emb. Features,WOT,0.976562
ASOIAF,Most Commonly Mentioned,---,0.914062
SOC,First Mentioned,---,0.428571
SOC,ML Classical Features,ASOIAF,0.923077
SOC,ML Classical Features,WOT,0.923077
SOC,ML Word Emb. Features,ASOIAF,0.945055


In [53]:
xs = [1,2,3]
np.random.shuffle(xs)
xs

[3, 1, 2]

# Combining training data

In [None]:
def make_program(datasets, mdls):
    program = OrderedDict()    
    for (test_data_name, test_data),(mdl_name,mdl) in it.product(datasets, mdls):
        if mdl_name[0:2]!="ML":
            continue
        full_train_data = []
        full_train_data_names = []
        for (train_data_name, train_data) in datasets:
            if train_data_name==test_data_name:
                continue
            full_train_data_names.append(train_data_name)
            full_train_data.extend(train_data)
            
        np.random.shuffle(full_train_data)
        program[(test_data_name, mdl_name, "+".join(full_train_data_names))] = (
            full_train_data,
            test_data,
            mdl()
        )
    return program

program = make_program(datasets, base_mdls)


res_comb = pd.DataFrame(index=pd.MultiIndex.from_tuples(program.keys()),
                   columns = all_metrics_names)
res_comb.sort_index(inplace=True)

for ind in res_comb.index:
    print(ind, end="")
    score = evaluate(*program[ind], metric=all_metrics)
    res_comb.loc[ind,:] = score
    print(" ", score)
    res_comb.to_csv("../results/combeval.csv", index_label=["Test Set", "Method", "Train Set"])
    
res_comb

('ASOIAF', 'ML Classical Features', 'WOT+SOC')  0.9765625
('ASOIAF', 'ML Word Emb. Features', 'WOT+SOC')  0.97265625
('SOC', 'ML Classical Features', 'WOT+ASOIAF')  0.9340659340659341
('SOC', 'ML Word Emb. Features', 'WOT+ASOIAF')

## Cross Evaluation
To test how much it effects things from different styles.

In [None]:
def make_program(datasets, mdls):
    program = dict()    
    for (data_name, data),(mdl_name,mdl) in it.product(datasets, mdls):
        program[(data_name, mdl_name)] = (data, mdl())
    return program
program = make_program(datasets+[("Combined", ann_comb)], base_mdls)


res_xval = pd.DataFrame(index=pd.MultiIndex.from_tuples(program.keys()),
                        columns = all_metrics_names)
res_xval.sort_index(inplace=True)

for ind in res_xval.index:
    print(ind, end="")
    score = xval_evaluate(*program[ind], metric=all_metrics) 
    res_xval.loc[ind, :] = score
    print(" ", score)
    res_xval.to_csv("../results/crosseval.csv", index_label=["Dataset", "Method"])
    
res_xval

# Supp data

SA  ground truth is really weak.
It is for a chapter which has maybe 4 scenses only 1-2 of which will actually be about that character


In [None]:
def make_program(datasets, supdatasets, mdls):
    all_datasets = list(datasets)
    all_datasets.extend(supdatasets)
    
    program = OrderedDict()    
    for (test_data_name, test_data),(mdl_name,mdl) in it.product(datasets, mdls):
        if mdl_name[0:2]=="ML":
            combined_data = []
            combined_data_names = []
            for (train_data_name, train_data) in all_datasets:
                if train_data_name==test_data_name:
                    continue
                combined_data.append(train_data)
                combined_data_names.append(train_data_name)
            
            if len(combined_data) > 1:
                train_data_name = " and ".join(combined_data_names)
                program[(test_data_name, mdl_name, train_data_name)] = (
                    np.hstack(combined_data),
                    test_data,
                    mdl()
                )
    return program

program = make_program(datasets,supdatasets, base_mdls)


res = pd.DataFrame(index=pd.MultiIndex.from_tuples(program.keys()),
                   columns = all_metrics_names)
res.sort_index(inplace=True)

for ind in res.index:
    print(ind, end="")
    
    score = evaluate(*program[ind], metric=all_metrics)
    res.loc[ind,:] = score
    print(" ", score)
    res.to_csv("../results/extradata.csv", index_label=["Test Set", "Method", "Train Set"])
    
res

# Sanity check WOT


In [37]:
the_CL_mdl = joblib.load("../trained_models/CL.pkl")
the_WE_mdl = joblib.load("../trained_models/WE.pkl")

In [41]:
texts, ref_chars = extract_texts_and_characters(ann_WOT)
out_chars =  np.asarray(list(the_WE_mdl.choose_characters(texts)))

In [48]:
np.asarray(list(zip(out_chars, ref_chars)))[out_chars != ref_chars]

array([['Moiraine', 'Rand'],
       ['Mat', 'Rand'],
       ['Hopper', 'Perrin'],
       ['Min', 'Siuan'],
       ['Mat', 'Rand'],
       ['Isendre', 'Rand'],
       ['Nynaeve', 'Rand'],
       ['Bornhald', 'Rand'],
       ['Min', 'Rand'],
       ['Joline', 'Slayer'],
       ['Perrin', 'Sevanna'],
       ['Sorilea', 'Sulin'],
       ['Elayne', 'Nynaeve'],
       ['Shiaine', 'Mili'],
       ['Egeanin', 'Mat'],
       ['Domon', 'Rand'],
       ['Egwene', 'Nynaeve'],
       ['Nynaeve', 'Reanne'],
       ['Nynaeve', 'Rand'],
       ['Min', 'Thom'],
       ['Mat', 'Rand'],
       ['Elayne', 'Moghedien'],
       ['Mat', 'Rand'],
       ['Egwene', 'Mat'],
       ['Moiraine', 'Rand'],
       ['Rand', 'Verin'],
       ['Faile', 'Perrin'],
       ['Alanna', 'Egwene'],
       ['Gaul', 'Perrin'],
       ['Tar Valon', 'Mat'],
       ['Elayne', 'Perrin'],
       ['Aviendha', 'Rand'],
       ['Min', 'Jaret'],
       ['Nynaeve', 'Rand'],
       ['Bryne', 'Gareth'],
       ['Rand', 'Thom'],
       ['Do

# Training Set accurasy

In [59]:
def make_program(datasets, mdls):
    program = OrderedDict()    
    for (test_data_name, test_data),(mdl_name,mdl) in it.product(datasets, mdls):
        if mdl_name[0:2]!="ML":
            continue
        program[(test_data_name,mdl_name, test_data_name)] = (
            test_data,
            test_data,
            mdl()
        )
    return program

program = make_program(datasets, base_mdls)

res_train = pd.DataFrame(index=pd.MultiIndex.from_tuples(program.keys()),
                   columns = all_metrics_names)
res_train.sort_index(inplace=True)

for ind in res_train.index:
    print(ind, end="")
    score = evaluate(*program[ind], metric=all_metrics)
    res_train.loc[ind,:] = score
    print(" ", score)
    res_train.to_csv("../results/traineval.csv", index_label=["Test Set", "Method", "Train Set"])
    
res_train

('ASOIAF', 'ML Classical Features', 'ASOIAF')  0.98046875
('ASOIAF', 'ML Word Emb. Features', 'ASOIAF')  0.98828125
('SOC', 'ML Classical Features', 'SOC')  0.945054945054945
('SOC', 'ML Word Emb. Features', 'SOC')  0.9560439560439561
('WOT', 'ML Classical Features', 'WOT')  0.7847222222222222
('WOT', 'ML Word Emb. Features', 'WOT')  0.7939814814814815


Unnamed: 0,Unnamed: 1,Unnamed: 2,Acc
ASOIAF,ML Classical Features,ASOIAF,0.980469
ASOIAF,ML Word Emb. Features,ASOIAF,0.988281
SOC,ML Classical Features,SOC,0.945055
SOC,ML Word Emb. Features,SOC,0.956044
WOT,ML Classical Features,WOT,0.784722
WOT,ML Word Emb. Features,WOT,0.793981


## Feature importance stuff

In [None]:
def feature_importance(mdl):
    _, _,vector_keys = get_feature_vectors(ann_comb[1]['text'])
    feature_weights = list(zip(mdl.classifier.feature_importances_,vector_keys))
    feature_weights.sort(reverse=True)
    non_zero_weights = [(weight,name) for weight, name in feature_weights if weight>0]
    print("Number of nonzeo weights: ", len(non_zero_weights))
    print("\n".join(", ".join(map(str,wt)) for wt in non_zero_weights))


In [None]:
feature_importance(CL_SOC)

In [None]:
feature_importance(HY_SOC)

In [None]:
feature_importance(CL_ASOIAF)

In [None]:
feature_importance(HY_ASOIAF)