# Absctraction Scorer

In [1]:
import nltk
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")


[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/jovyan/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

In [2]:
import pandas as pd

df = pd.read_csv("./voice_classified.csv")
df.voice = df.voice.apply(eval)
df.clauses_text_final = df.clauses_text_final.apply(eval)
df

Unnamed: 0,prompt,response,clauses_text_final,voice,idx
0,Sometimes I wish that,my mom would come home more often.,[my mom would come home more often],[A_pron_x],0
1,My family,has a cat...I don't have a cat though I have 2...,"[has a cat, I don t, have a cat, though I have...","[A_def, A_def, A_def, A_def]",1
2,When I am nervous,my hands get shaky,[my hands get shaky],[A_pron_x],2
3,At times I worry about,"my own dreams, nightmares I have thousands of ...","[my own dreams nightmares, I have thousands of...","[Undefined, A_def]",3
4,Technology,its crazy and i wish i could keep up,"[i wish, i could keep up]","[A_def, A_def]",4
5,When I get mad,I sometimes use big voice then I go somewhere ...,"[I sometimes use big voice, then I go somewher...","[A_def, P_get_x, A_def]",5
6,My main problem is,a feeling that I am not entitled,"[a feeling, I am not, that entitled]","[Undefined, P_bevb_x, A_def]",6
7,If my mother,"was a different lady, I would not be who I am.","[was a different lady, I would not be, who I am]","[P_bevb_x, A_def, P_bevb_x]",7
8,When I get mad,"it takes a long time to build up, but I am slo...","[it takes a long time but, to build up, I am s...","[A_def, A_def, P_bevb_x, A_def]",8
9,When they avoided me,"I felt safe, knowing that I could receive a wa...","[I felt safe knowing, that I could receive a w...","[A_def, A_def, A_pron_x]",9


In [3]:
from nltk.corpus import wordnet as wn, stopwords
ignore_words = list(set(stopwords.words('english')))
ignore_words = ignore_words + ['keep']

DEBUG_ABSTRACTION_HIERARCHY = False
class GenericTree:
    def __init__(self, nltk_word, print_pad = ""):
        self.word = nltk_word
        self.print_pad = print_pad

    def get_max_depth(self):
        if len(self.children) == 0:
            return 0
        return max([tree.get_max_depth() for tree in self.children ])+1
    
    def print_tree(self):
        print("{}>{}".format(self.print_pad, self.word.lemma_names()[0]))
        for tree in self.children:
            tree.print_tree()
            
class HypernymTree(GenericTree):
    def __init__(self, nltk_word, pos, print_pad = ""):
        super().__init__(nltk_word, print_pad)
        if len(nltk_word.hypernyms()) == 0:
            self.children = [] 
        else:
            hyper = nltk_word.hypernyms() 
            hyper = [x for x in hyper if x.pos() == pos]
            self.children = [HypernymTree(x, pos, print_pad = "{}==".format(self.print_pad)) for x in hyper]
        
class HyponymTree(GenericTree):
    def __init__(self, nltk_word, pos, print_pad = ""): 
        super().__init__(nltk_word, print_pad)
        if len(nltk_word.hyponyms()) == 0:
            self.children = [] 
        else:
            hypo = nltk_word.hyponyms()
            hypo = [x for x in hypo if x.pos() == pos]
            self.children = [HyponymTree(x, pos, print_pad = "{}==".format(self.print_pad)) for x in hypo]


In [4]:
from nltk.tokenize import word_tokenize

def hypernym_hierarchy(toks, pos):
    hyper_trees = [HypernymTree(tok, pos) for tok in toks]
    hypers = [tree.get_max_depth() for tree in hyper_trees]
    hypo_trees = [HyponymTree(tok, pos) for tok in toks]
    hypos = [tree.get_max_depth() for tree in hypo_trees]
    if DEBUG_ABSTRACTION_HIERARCHY:
        for tree in hyper_trees:
            print("#####################################################3")
            tree.print_tree()
        for tree in hypo_trees:
            print("#####################################################3")
            tree.print_tree()

    assert len(hypers) == len(hypos)
    op = [hypos[i]*1./(hypers[i] + hypos[i]) if hypers[i]>0 or hypos[i]>0 else 0 for i in range(len(hypers))]
    #print(op)
    return max(op)

def hypernym_score(word):
    toks = wn.synsets(word)
    if len(toks) == 0 or word in ignore_words:
        return 0

    op = hypernym_hierarchy(toks, toks[0].pos())
    return round(op, 2)
    
def score_abstraction(clauses):
    op = []
    for clause in clauses:
        op.append(max([hypernym_score(x) for x in clause]))
    return op

df['abstraction_score'] = df.clauses_text_final.apply(score_abstraction)
df.sample(frac=1).head(20)

Unnamed: 0,prompt,response,clauses_text_final,voice,idx,abstraction_score
9,When they avoided me,"I felt safe, knowing that I could receive a wa...","[I felt safe knowing, that I could receive a w...","[A_def, A_def, A_pron_x]",9,"[0.22, 0.25]"
12,Being with other people,is both a learning experience and an opportuni...,[is both a learning experience and an opportun...,"[P_bevb_x, A_def]",12,"[0.25, 0.25]"
0,Sometimes I wish that,my mom would come home more often.,[my mom would come home more often],[A_pron_x],0,[0.25]
37,When I am criticized,I can embody my personality and feel a pang co...,"[I can embody my personality and, feel a pang ...","[A_pron_x, A_def, A_def, A_def, A_def, A_def, ...",37,"[0.25, 0.25, 0.22, 0.14, 0.14, 0.25]"
35,A good boss,is a solar being ... one that brings light and...,"[is, a solar being one, that brings light and ...","[P_bevb_x, A_def, A_def, A_def, P_bevb_x, A_pr...",35,"[0, 0.14, 0.14, 0.25, 0.14, 0.25, 0.14, 0.25]"
17,When they avoided me,hmmm... I'm stumped. I have no idea how to res...,"[I have no idea, how to respond to this, Altho...","[A_def, A_def, P_bevb_x]",17,"[0.22, 0.14, 0.14]"
22,A good boss,A good boss is critical toward making a workpl...,"[A good boss is critical toward, making a work...","[P_bevb_x, A_def]",22,"[0.25, 0.25]"
40,My main problem is,how my narccissim gets in the way of seeing th...,"[how my narccissim gets in the way of, seeing ...","[A_pron_x, A_def]",40,"[0.25, 0.14]"
36,My conscience bothers me if,I'm not placing my feet on the path with an op...,[I m not placing my feet on the path with an o...,"[A_def, A_pron_x, A_def, A_def, A_def]",36,"[0.25, 0.14, 0.14, 0.25]"
48,Privacy,the human formconsicousness separates from the...,[the human formconsicousness separates from th...,[A_def],48,[0.25]


In [5]:
df.to_csv("./abstraction_scored.csv", index = False)