In [101]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
from nltk.wsd import lesk
import nltk

In [102]:
TRAIN_ENGLISH_WIKIPEDIA = "../cwishareddataset/traindevset/" + \
                           "english/Wikipedia_Train.tsv"
df = pd.read_csv(TRAIN_ENGLISH_WIKIPEDIA, sep = "\t")
df.columns = ['id', 'sentence', "start", "end", "target", 
              "nat", "non_nat", "nat_marked", "non_nat_marked", "binary", "prob"]

In [103]:
df.index
df.columns

Index(['id', 'sentence', 'start', 'end', 'target', 'nat', 'non_nat',
       'nat_marked', 'non_nat_marked', 'binary', 'prob'],
      dtype='object')

In [104]:
df.loc[df.binary == 1, ['target', 'binary', 'prob']].describe()

Unnamed: 0,binary,prob
count,2493.0,2493.0
mean,1.0,0.211372
std,0.0,0.228076
min,1.0,0.05
25%,1.0,0.05
50%,1.0,0.1
75%,1.0,0.25
max,1.0,1.0


In [105]:
df['length'] = df.target.apply(len)
df.groupby('binary')['length'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
binary,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,3057.0,6.357213,3.558134,2.0,4.0,6.0,7.0,49.0
1,2493.0,10.203771,5.206972,2.0,7.0,9.0,12.0,49.0


# WordNet Features
Here we implement all the relevant features based on WordNet and SentiWordNet. For example, the number of synsets the target word is contained in or the average length of the lemmas of all the synsets the target word is contained in. Note that all features that are computed in the following exploit neither the POS-Tag of the target word nor Word Sense Disambiguation by e.g. UKB-Algorithm.

In [418]:
def wn_synset_freq(target):
    return len(wn.synsets(target))

def wn_synset_avg_lemma_freq(target):
    return np.nan_to_num(np.mean([len(synset.lemmas()) 
            for synset in wn.synsets(target)]))

def wn_synset_avg_lemma_len(target):
    return np.nan_to_num(np.nanmean([len(lemma.name()) 
            for synset in wn.synsets(target) 
            for lemma in synset.lemmas()]))

def wn_synset_avg_hypernyms(target):
    return np.nan_to_num(np.nanmean([len(synset.hypernyms()) 
            for synset in wn.synsets(target)]))

def wn_synset_avg_hyponyms(target):
    return np.nan_to_num(np.mean([len(synset.hyponyms()) 
            for synset in wn.synsets(target)]))

def wn_synset_sum_hypernyms(target):
    return np.sum(([len(synset.hypernyms()) 
            for synset in wn.synsets(target)]))

def wn_synset_avg_definition_len(target):
    return np.nan_to_num(np.mean([len(str(synset.definition())) 
            for synset in wn.synsets(target)]))

def wn_synset_avg_hyptree_depth(target):
    return np.nan_to_num(np.mean([synset.max_depth() 
            for synset in wn.synsets(target)]))

def wn_synset_num_distinct_pos(target):
    return len(set([synset.pos() for synset in wn.synsets(target)]))

def wn_synset_avg_num_relations(target):
    return np.nan_to_num(np.mean([np.sum([len(synset.hypernyms()), len(synset.hyponyms()), 
             len(synset.instance_hypernyms()), len(synset.instance_hyponyms()),
             len(synset.member_holonyms()), len(synset.substance_holonyms()),
             len(synset.part_holonyms()), len(synset.member_meronyms()),
             len(synset.substance_meronyms()), len(synset.part_meronyms())]) 
             for synset in wn.synsets(target)]))

def wn_synset_avg_freq_pos(target, pos):
    return len(wn.synsets(target, pos = pos))

def wn_synset_sense_entropy_uniform(target):
    num_senses = len(wn.synsets(target))
    return -np.sum([((1 / num_senses) * np.log2(1 / num_senses)) 
                     for index in range(0, num_senses)])

def wn_synset_sense_entropy_pos_uniform(target):
    num_senses = len(wn.synsets(target))
    pos_distribution = [len(wn.synsets(target, pos = wn.NOUN)),
                        len(wn.synsets(target, pos = wn.VERB)),
                        len(wn.synsets(target, pos = wn.ADJ)),
                        len(wn.synsets(target, pos = wn.ADV))]
    return -np.sum([(np.nan_to_num((count / num_senses) * np.log2(count / num_senses))) 
            for count in pos_distribution]) if num_senses != 0 else 0

def wn_synsets_sense_entropy_pos_central(target, pos):
    num_senses_pos = len(wn.synsets(target, pos = pos))
    return -np.sum([((1 / num_senses_pos) * np.log2(1 / num_senses_pos))
                     for index in range(0, num_senses_pos)])
     

def swn_avg_objective_score(target):
    return np.nan_to_num(np.mean([senti_synset.obj_score() 
                for senti_synset in swn.senti_synsets(target)]))

def pos_tag(sentence, target):
    tokens = nltk.word_tokenize(sentence)
    wordPOSPairs = [token for token in nltk.pos_tag(tokens) if token[0] == target]
    return wordPOSPairs[0][1] if len(wordPOSPairs) > 0 else None

def penn_to_wn(tag):
    if not tag:
        return None
    if tag.startswith('N'):
        return 'n'
    if tag.startswith('V'):
        return 'v'
    if tag.startswith('J'):
        return 'a'
    if tag.startswith('R'):
        return 'r'
    return None

df['wn_synset_freq'] = df.target.apply(wn_synset_freq)
df['wn_synset_avg_lemma_freq'] = df.target.apply(wn_synset_avg_lemma_freq)
df['wn_synset_avg_lemma_len'] = df.target.apply(wn_synset_avg_lemma_len)

df['wn_synset_diff_len_avg_lemma_len'] = df.wn_synset_avg_lemma_len - df.length
df['wn_synset_avg_hypernyms'] = df.target.apply(wn_synset_avg_hypernyms)
df['wn_synset_sum_hypernyms'] = df.target.apply(wn_synset_sum_hypernyms)
df['wn_synset_avg_hyponyms'] = df.target.apply(wn_synset_avg_hyponyms)

df['wn_synset_avg_definition_len'] = df.target.apply(wn_synset_avg_definition_len)
df['wn_synset_avg_hyptree_depth'] = df.target.apply(wn_synset_avg_hyptree_depth)
df['wn_synset_num_distinct_pos'] = df.target.apply(wn_synset_num_distinct_pos)
df['wn_synset_avg_num_relations'] = df.target.apply(wn_synset_avg_num_relations)

# Synset sizes of the target word for the four different POS-Tags in WordNet
df['wn_synset_avg_freq_pos_noun'] = df.target.apply(lambda target : wn_synset_avg_freq_pos(target, wn.NOUN))
df['wn_synset_avg_freq_pos_verb'] = df.target.apply(lambda target : wn_synset_avg_freq_pos(target, wn.VERB))
df['wn_synset_avg_freq_pos_adj'] = df.target.apply(lambda target : wn_synset_avg_freq_pos(target, wn.ADJ))
df['wn_synset_avg_freq_pos_adv'] = df.target.apply(lambda target : wn_synset_avg_freq_pos(target, wn.ADV))

# Normalized POS-Tag synset sizes
df['wn_synset_avg_freq_pos_noun_norm'] = np.nan_to_num(df.wn_synset_avg_freq_pos_noun / df.wn_synset_freq)
df['wn_synset_avg_freq_pos_verb_norm'] = np.nan_to_num(df.wn_synset_avg_freq_pos_verb / df.wn_synset_freq)
df['wn_synset_avg_freq_pos_adj_norm'] = np.nan_to_num(df.wn_synset_avg_freq_pos_adj / df.wn_synset_freq)
df['wn_synset_avg_freq_pos_adv_norm'] = np.nan_to_num(df.wn_synset_avg_freq_pos_adv / df.wn_synset_freq)

df['pos_tag'] = df[['sentence', 'target']].apply(lambda vals : pos_tag(*vals), axis = 1)
df['wn_synset_sense_entropy_uniform'] = df.target.apply(wn_synset_sense_entropy_uniform)
df['wn_synset_sense_entropy_pos_uniform'] = df.target.apply(wn_synset_sense_entropy_pos_uniform)
df['wn_synsets_sense_entropy_pos_central'] = df[['target', 'pos_tag']].apply(
    lambda vals : wn_synsets_sense_entropy_pos_central(vals[0], penn_to_wn(vals[1])), axis = 1)

df['swn_avg_objective_score'] = df.target.apply(swn_avg_objective_score)

  out=out, **kwargs)
  # Remove the CWD from sys.path while we load stuff.
  from ipykernel import kernelapp as app


In [420]:
df.loc[:,['target', 'length','wn_synset_freq', 'wn_synset_sense_entropy_uniform', 'wn_synset_sense_entropy_pos_uniform', 'wn_synsets_sense_entropy_pos_central']].head()

Unnamed: 0,target,length,wn_synset_freq,wn_synset_sense_entropy_uniform,wn_synset_sense_entropy_pos_uniform,wn_synsets_sense_entropy_pos_central
0,passed,6,25,4.643856,-0.0,4.643856
1,land,4,18,4.169925,0.964079,3.459432
2,future,6,7,2.807355,0.985228,2.0
3,future generations,18,0,-0.0,0.0,-0.0
4,generations,11,7,2.807355,-0.0,2.807355


# PorterStemmer and StanfordNLP Features
Here we implement features based on the PorterStemmer library from nltk.

In [425]:
from nltk.stem.porter import *
from nltk.stem.wordnet import *
from nltk.tag.stanford import StanfordNERTagger
from nltk.parse.stanford import StanfordDependencyParser
from nltk.tokenize import word_tokenize
import os

java_path = "C:/Program Files (x86)/Java/jdk1.8.0_144/bin/java.exe"
os.environ['JAVAHOME'] = java_path
path_to_jar = 'resources/stanford-dependency-parser/stanford-parser.jar'
path_to_models_jar = 'resources/stanford-dependency-parser/stanford-parser-3.9.1-models.jar'

porterStemmer = PorterStemmer()
wordNetLemmatizer = WordNetLemmatizer()
nerTagger = StanfordNERTagger('resources/stanford-ner-tagger/classifiers/english.all.3class.distsim.crf.ser.gz',
               'resources/stanford-ner-tagger/stanford-ner.jar',
               encoding='utf-8')
dependencyParser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

def porter_stem_len(target):
    return len(str(porterStemmer.stem(target)))

def porter_stemmer_num_steps(target):
    stem = target.lower()
    applied_steps = 0
    if porterStemmer.mode == porterStemmer.NLTK_EXTENSIONS and target in porterStemmer.pool:
            return applied_steps
    if porterStemmer.mode != porterStemmer.ORIGINAL_ALGORITHM and len(target) <= 2:
            return applied_steps
    step_funcs = [porterStemmer._step1a, porterStemmer._step1b, porterStemmer._step1c,
                  porterStemmer._step2, porterStemmer._step3, porterStemmer._step3,
                  porterStemmer._step4, porterStemmer._step5a, porterStemmer._step5b]
    for step_func in step_funcs:
        stem_step = step_func(stem)
        if stem_step != stem:
            stem = stem_step
            applied_steps += 1
    return applied_steps

def is_named_entity(sentence, target):
    tokenized_sent = word_tokenize(sentence)
    tagged_sent = nerTagger.tag(tokenized_sent)
    for token, tag in tagged_sent:
        if token == target and tag != 'O':
            return 1
    return 0

def named_entity_type(sentence, target):
    tokenized_sent = word_tokenize(sentence)
    tagged_sent = nerTagger.tag(tokenized_sent)
    return [tag for token, tag in tagged_sent if token == target][0]

# TODO consider using stanford lemmatizer and compute word similarity metric
# to orignal target
def wordnet_lemma_len(target):
    return len(wordNetLemmatizer.lemmatize(target))


# Porter stemmer stem length, number of applied steps,
# difference of stem length to target and reduction ratio
df['porter_stem_len'] = df.target.apply(porter_stem_len)
df['porter_stemmer_num_steps'] = df.target.apply(porter_stemmer_num_steps)
df['diff_len_stem_len'] = df.length - df.num_porter_stem_len
df['reduction_stem_len'] = 1 - df.porter_stem_len / df.length

# WordNet lemma length, differnce of lemma length to target
# length and reduction ratio for lemmatization
df['wordnet_lemma_len'] = df.target.apply(wordnet_lemma_len)
df['diff_len_wordnet_lemma_len'] = df.length - df.wordnet_lemma_len
df['reduction_lemma_len'] = 1 - df.wordnet_lemma_len / df.length

# StanfordNLP features CAUTION: The tagger is slow
#df['is_named_entity'] = df[['sentence', 'target']].apply(lambda vals : is_named_entity(*vals), axis = 1)
#df['named_entity_type'] = df[['sentence', 'target']].apply(lambda vals : named_entity_type(*vals), axis = 1)
result = dependencyParser.raw_parse('I shot an elephant in my sleep')
list(result)

[<DependencyGraph with 8 nodes>]

In [None]:
df.loc[:, ['target', 'length', 'wordnet_lemma_len', 'diff_len_wordnet_lemma_len', 'reduction_lemma_len']]

# Orthographic Features
Here we compute orthographic features for the target word

In [345]:
#Relative position of the target word based on tokens
df['relative_position'] = df[['sentence', 'target']].apply(lambda vals : 
            (nltk.word_tokenize(vals[0]).index(vals[1].split()[0])) / len((nltk.word_tokenize(vals[0]))), axis = 1)
# Relative positions of the target word based on character counting
df['relative_position_left'] = df[['sentence', 'start']].apply(lambda vals : vals[1] / len(vals[0]), axis = 1)
df['relative_position_centered'] = df[['sentence', 'start', 'end']].apply(lambda vals : 
            ((vals[1] + vals[2]) / 2) / len(vals[0]), axis = 1)
df['relative_position_right'] = df[['sentence', 'end']].apply(lambda vals : vals[1] / len(vals[0]), axis = 1)

In [None]:
df.loc[:, ['sentence', 'target', 'start', 'relative_position', 'relative_position_left', 'relative_position_centered', 'relative_position_right']]

# Context-Aware Features
Here we compute not only the context extraction/definition in the first place but also the corresponding context features afterwards. Also we need to implement proper strategies to cope with the target occuring multiple times in the sentence. To avoid mistakes, we should use the actual start and end tags from the dataset.

In [58]:
from nltk.tokenize import word_tokenize
from nltk.parse.stanford import StanfordDependencyParser
from nltk.parse.stanford import StanfordNeuralDependencyParser
import os

java_path = "C:/Program Files (x86)/Java/jdk1.8.0_144/bin/java.exe"
os.environ['JAVAHOME'] = java_path
path_to_jar = 'resources/stanford-dependency-parser/stanford-parser.jar'
path_to_models_jar = 'resources/stanford-dependency-parser/stanford-parser-3.9.1-models.jar'

dependencyParser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)

def post_process_ctx(context):
    return [token for token in context if token not in (",", "'", "'s")]
    
def ctx_extraction_all(context, target):
    return word_tokenize(context)

def ctx_extraction_all_filtered(context, target):
    ctx_tokens = word_tokenize(context)
    return post_process_ctx(ctx_tokens)

def ctx_extraction_window_pre_n(context, target, n = 3):
    ctx_tokens = word_tokenize(context)
    post_ctx_tokens = post_process_ctx(ctx_tokens)
    target_index = post_ctx_tokens.index(target)
    start_index = (target_index - n) if (target_index - n) > 0 else 0
    return post_ctx_tokens[start_index:target_index]

def ctx_extraction_window_suc_n(context, target, n = 3):
    ctx_tokens = word_tokenize(context)
    post_ctx_tokens = post_process_ctx(ctx_tokens)
    target_index = post_ctx_tokens.index(target)
    end_index = (target_index + 1 + n) if (target_index + 1 + n) \
                < len(post_ctx_tokens) else len(post_ctx_tokens)
    return post_ctx_tokens[target_index+1:end_index]

def ctx_extraction_window_pre_suc_n(context, target, n = 3):
    ctx_tokens_pre = ctx_extraction_window_pre_n(context, target, n)
    ctx_tokens_suc = ctx_extraction_window_suc_n(context, target, n)
    ctx_tokens_pre.extend(ctx_tokens_suc)
    return ctx_tokens_pre

def ctx_extraction_dep_in(context, target):
    return [triple[0][0] for parse in dependencyParser.raw_parse(context)
            for triple in list(parse.triples()) if triple[2][0] == target]

def ctx_extraction_dep_out(context, target):
    return [triple[2][0] for parse in dependencyParser.raw_parse(context)
            for triple in list(parse.triples()) if triple[0][0] == target]

def ctx_extraction_dep_in_out(context, target):
    ctx_tokens_in = ctx_extraction_dep_in(context, target)
    ctx_tokens_out = ctx_extraction_dep_out(context, target)
    ctx_tokens_in.extend(ctx_tokens_out)
    return ctx_tokens_in

def ctx_extraction_dep_recu_in_n_steps(context, target, n = 2):
    deps = [triple for parse in dependencyParser.raw_parse(context)
            for triple in parse.triples()]
    result_tokens = []
    curr_target = [target]
    for step in range(0, n):
        step_result = [triple[0][0] for triple in deps 
                       if triple[2][0] in curr_target]
        curr_target = list(set(step_result))
        result_tokens.extend(step_result)
    return list(set(result_tokens))

def ctx_extraction_dep_recu_out_n_steps(context, target, n = 2):
    deps = [triple for parse in dependencyParser.raw_parse(context)
            for triple in parse.triples()]
    result_tokens = []
    curr_target = [target]
    for step in range(0, n):
        step_result = [triple[2][0] for triple in deps 
                       if triple[0][0] in curr_target]
        curr_target = list(set(step_result))
        result_tokens.extend(step_result)
    return list(set(result_tokens))

def ctx_extraction_dep_recu_in_out_n_steps(context, target, n = 2):
    deps = [triple for parse in dependencyParser.raw_parse(context)
            for triple in parse.triples()]
    result_tokens = []
    curr_target = [target]
    for step in range(0, n):
        step_result = [triple[2][0] for triple in deps 
                       if triple[0][0] in curr_target]
        step_result_out = [triple[0][0] for triple in deps 
                       if triple[2][0] in curr_target]
        step_result.extend(step_result_out)
        curr_target = list(set(step_result))
        result_tokens.extend(step_result)
    return list(set(result_tokens))

def ctx_extraction_dep_recu_in_cover(context, target, cover = 0.1):
    deps = [triple for parse in dependencyParser.raw_parse(context)
            for triple in parse.triples()]
    ctx_tokens = word_tokenize(context)
    ctx_tokens_post = post_process_ctx(ctx_tokens)
    result_tokens = []
    curr_target = [target]
    curr_cover = 0
    while curr_cover < cover:
        step_result = [triple[0][0] for triple in deps 
                       if triple[2][0] in curr_target]
        if set(step_result) == set(curr_target):
                break
        curr_target = list(set(step_result))
        result_tokens.extend(step_result)
        curr_cover = len(result_tokens) / len(ctx_tokens_post)
    return list(set(result_tokens))

def ctx_extraction_dep_recu_out_cover(context, target, cover = 0.1):
    deps = [triple for parse in dependencyParser.raw_parse(context)
            for triple in parse.triples()]
    ctx_tokens = word_tokenize(context)
    ctx_tokens_post = post_process_ctx(ctx_tokens)
    result_tokens = []
    curr_target = [target]
    curr_cover = 0
    while curr_cover < cover:
        step_result = [triple[2][0] for triple in deps 
                       if triple[0][0] in curr_target]
        if set(step_result) == set(curr_target):
                break
        curr_target = list(set(step_result))
        result_tokens.extend(step_result)
        curr_cover = len(result_tokens) / len(ctx_tokens_post)
    return list(set(result_tokens))

def ctx_extraction_dep_recu_in_out_cover(context, target, cover = 0.1):
    deps = [triple for parse in dependencyParser.raw_parse(context)
            for triple in parse.triples()]
    ctx_tokens = word_tokenize(context)
    ctx_tokens_post = post_process_ctx(ctx_tokens)
    result_tokens = []
    curr_target = [target]
    curr_cover = 0
    while curr_cover < cover:
        step_result = [triple[2][0] for triple in deps 
                       if triple[0][0] in curr_target]
        step_result_out = [triple[0][0] for triple in deps 
                       if triple[2][0] in curr_target]
        step_result.extend(step_result_out)
        if set(step_result) == set(curr_target):
                break
        curr_target = list(set(step_result))
        result_tokens.extend(step_result)
        curr_cover = len(result_tokens) / len(ctx_tokens_post)
    print(curr_cover)
    return list(set(result_tokens))

def agg_feat_num_average(tokens, func_feature):
    return np.mean([func_feature(token) for token in tokens])

def agg_feat_num_median(tokens, func_feature):
    return np.median([func_feature(token) for token in tokens])

In [None]:
sentence = "Normally, the land will be passed down by future generations in a way " + \
             "that recognizes the community's traditional connection to that country "
target = 'passed'

print('ctx_etraction_all:')
print(ctx_extraction_all_filtered(sentence, target))

print('ctx_extraction_window_pre_n:')
print(ctx_extraction_window_pre_n(sentence, "Normally"))
print(ctx_extraction_window_pre_n(sentence, "the"))
print(ctx_extraction_window_pre_n(sentence, "land"))
print(ctx_extraction_window_pre_n(sentence, target, n = 5))

print('ctx_extraction_window_suc_n:')
print(ctx_extraction_window_suc_n(sentence, "country"))
print(ctx_extraction_window_suc_n(sentence, "to"))
print(ctx_extraction_window_suc_n(sentence, "connection"))
print(ctx_extraction_window_suc_n(sentence, "community", n = 5))

print('ctx_extraction_window_pre_suc_n:')
print(ctx_extraction_window_pre_suc_n(sentence, "passed"))
print(ctx_extraction_window_pre_suc_n(sentence, "the"))
print(ctx_extraction_window_pre_suc_n(sentence, "to"))

print('ctx_extraction_dep_in:')
print(ctx_extraction_dep_in(sentence, "land"))

print('ctx_extraction_dep_out:')
print(ctx_extraction_dep_out(sentence, target))
print(ctx_extraction_dep_out(sentence, "land"))

print('ctx_extraction_dep_in_out:')
print(ctx_extraction_dep_in_out(sentence, "land"))

print('ctx_extraction_dep_recu_in_n_steps:')
print(ctx_extraction_dep_recu_in_n_steps(sentence, "the", n = 3))

print('ctx_extraction_dep_recu_out_n_steps:')
print(ctx_extraction_dep_recu_out_n_steps(sentence, "the"))

print('ctx_extraction_dep_recu_in_out_n_steps:')
print(ctx_extraction_dep_recu_in_out_n_steps(sentence, "the"))

print('ctx_extraction_dep_recu_in_cover:')
print(ctx_extraction_dep_recu_in_cover(sentence, "the", cover=0.1))

print('ctx_extraction_dep_recu_out_cover:')
print(ctx_extraction_dep_recu_out_cover(sentence, "the", cover=0.1))

print('ctx_extraction_dep_recu_in_out_cover:')
print(ctx_extraction_dep_recu_in_out_cover(sentence, "the", cover=0.1))

After we defined all the context extraction approaches, we can apply them on the actual dataset. To do so, we first extract all the distinct sentences from the actual training set and create a new dataframe containing only the sentence ids, the sentence, the target and all the computed contexts. This also makes it easier to integrate context extraction functions implemented in other languages. Afterwards we can compute the context features and join them back with the target features dataframe.

In [None]:
df_context = df.loc[:, ['id', 'sentence', 'target', 'start', 'end']].head()
df_context['ctx_avg_word_length'] = \
    df_context[['sentence', 'target']].apply(lambda vals : 
                               ctx_extraction_window_pre_suc_n(vals[0], vals[1]), axis = 1)

# Feature Importance
Here we compute individual feature importance based on different metrics. For example, we implement and compute the F-Score, providing an idea of the discrimination power the feature has.

In [179]:
#df.groupby('binary')['porter_stemmer_num_steps'].describe()
df.groupby('target').size().sort_values()

def feat_importance_f_score(dataframe, feat_name, label_name):
    mean_feat = np.mean(dataframe.loc[:, [feat_name]])[0]
    means = df.loc[: , [feat_name, label_name]].groupby(label_name).mean().reset_index()
    mean_negativ = means.loc[means[label_name] == 0, [feat_name]][feat_name][0]
    mean_positiv = means.loc[means[label_name] == 1, [feat_name]][feat_name][1]
    # Compute the sum of deviations of the class mean from the overall mean
    class_mean_devs = (mean_positiv - mean_feat)**2 + (mean_negativ - mean_feat)**2
    # Compute neagtive instance based values
    neg_inst = dataframe.loc[dataframe[label_name] == 0, [feat_name]]
    std_dev_neg = (np.sum((neg_inst - mean_negativ)**2) / (len(neg_inst) - 1))[feat_name]
    #Compute positive instance based values
    pos_inst = dataframe.loc[dataframe[label_name] == 1, [feat_name]]
    std_dev_pos = (np.sum((pos_inst - mean_positiv)**2) / (len(pos_inst) - 1))[feat_name]
    return class_mean_devs / (std_dev_neg + std_dev_pos)
    
feat_importance_f_score(df, 'length', 'binary')

0.18792713906124658