# Absctraction Scorer

In [1]:
import nltk
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")


[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/jovyan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [2]:
import pandas as pd

df = pd.read_csv("./voice_classified.csv")
df.voice = df.voice.apply(eval)
df.clauses_text_final = df.clauses_text_final.apply(eval)
df

Unnamed: 0,prompt,response,clauses_text_final,voice,idx
0,The past,"Winds through us, both from our lives and cult...",[Winds through us both from our lives and cult...,"[A_pron_x, A_pron_x, P_bevb_x, P_bevb_x, P_bev...",0
1,At times I worry about,Insufficient care and attention for his egocen...,[Insufficient care and attention for his egoce...,[A_pron_x],1
2,What gets me into trouble is,not considering others possibilities.,[not considering others possibilities],[A_def],2
3,When people are helpless,They often don&#039;t know it so they flak aro...,"[They often don, t know it, so they flak aroun...","[P_bevb_x, A_def, A_def, A_def]",3
4,Privacy,is a sense of hiding from others that which yo...,"[is a sense of, hiding from others that, which...","[P_bevb_x, A_def, P_bevb_x, P_bevb_x, A_def, A...",4
5,If I can\'t get what I want,I really don&#039;t want that much any more.,"[want, I really don, t want that much any more]","[A_def, P_bevb_x, A_def]",5
6,Change is,hj;oh,[hj],[Undefined],6
7,When people are helpless,At times I try to find other ways of doing thi...,"[At times I try to find other ways of, doing t...","[A_def, A_def, A_def, P_bevb_x, P_get_x, A_def...",7
8,When I am criticized,I often find myself running through a microgen...,"[I often find, myself running through a microg...","[A_def, A_def, A_def, A_def, A_def, A_def]",8
9,Being with other people,Is a mutually mutating meeting of universes,"[Is a meeting of universes, mutually mutating]","[P_bevb_x, A_def]",9


In [3]:
from nltk.corpus import wordnet as wn, stopwords
ignore_words = list(set(stopwords.words('english')))
ignore_words = ignore_words + ['keep']

DEBUG_ABSTRACTION_HIERARCHY = False
class GenericTree:
    def __init__(self, nltk_word, print_pad = ""):
        self.word = nltk_word
        self.print_pad = print_pad

    def get_max_depth(self):
        if len(self.children) == 0:
            return 0
        return max([tree.get_max_depth() for tree in self.children ])+1
    
    def print_tree(self):
        print("{}>{}".format(self.print_pad, self.word.lemma_names()[0]))
        for tree in self.children:
            tree.print_tree()
            
class HypernymTree(GenericTree):
    def __init__(self, nltk_word, pos, print_pad = ""):
        super().__init__(nltk_word, print_pad)
        if len(nltk_word.hypernyms()) == 0:
            self.children = [] 
        else:
            hyper = nltk_word.hypernyms() 
            hyper = [x for x in hyper if x.pos() == pos]
            self.children = [HypernymTree(x, pos, print_pad = "{}==".format(self.print_pad)) for x in hyper]
        
class HyponymTree(GenericTree):
    def __init__(self, nltk_word, pos, print_pad = ""): 
        super().__init__(nltk_word, print_pad)
        if len(nltk_word.hyponyms()) == 0:
            self.children = [] 
        else:
            hypo = nltk_word.hyponyms()
            hypo = [x for x in hypo if x.pos() == pos]
            self.children = [HyponymTree(x, pos, print_pad = "{}==".format(self.print_pad)) for x in hypo]


In [4]:
from nltk.tokenize import word_tokenize

def hypernym_hierarchy(toks, pos):
    hyper_trees = [HypernymTree(tok, pos) for tok in toks]
    hypers = [tree.get_max_depth() for tree in hyper_trees]
    hypo_trees = [HyponymTree(tok, pos) for tok in toks]
    hypos = [tree.get_max_depth() for tree in hypo_trees]
    if DEBUG_ABSTRACTION_HIERARCHY:
        for tree in hyper_trees:
            print("#####################################################3")
            tree.print_tree()
        for tree in hypo_trees:
            print("#####################################################3")
            tree.print_tree()

    assert len(hypers) == len(hypos)
    op = [hypos[i]*1./(hypers[i] + hypos[i]) if hypers[i]>0 or hypos[i]>0 else 0 for i in range(len(hypers))]
    #print(op)
    return max(op)

def hypernym_score(word):
    toks = wn.synsets(word)
    if len(toks) == 0 or word in ignore_words:
        return 0

    op = hypernym_hierarchy(toks, toks[0].pos())
    return round(op, 2)
    
def score_abstraction(clauses):
    op = []
    for clause in clauses:
        op.append(max([hypernym_score(x) for x in clause]))
    return op

df['abstraction_score'] = df.clauses_text_final.apply(score_abstraction)
df.sample(frac=1).head(20)

Unnamed: 0,prompt,response,clauses_text_final,voice,idx,abstraction_score
133,When people are helpless,I am inspired to reach out and be of service.,"[I am inspired to reach out and, be of service]","[P_bevb_x, A_def, P_bevb_x]",133,"[0.25, 0.25]"
48,My main problem is,how my narccissim gets in the way of seeing th...,"[how my narccissim gets in the way of, seeing ...","[A_pron_x, A_def]",48,"[0.25, 0.14]"
434,If I were in charge,id be more careful,[i d be more careful],[P_bevb_x],434,[0.25]
396,The thing I like about myself is,the way in which I am made up of the constella...,"[the way, in which I am made up of the constel...","[Undefined, P_bevb_x, A_pron_x, A_def, A_def, ...",396,"[0.14, 0.25, 0.25, 0.14, 0.25]"
531,Change is,"happening all the time, continues and is inevi...","[happening, all the time continues and, is ine...","[A_def, A_def, P_bevb_x, A_pron_x, P_bevb_x, P...",531,"[0.14, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25]"
222,What I like to do best is,Is an expression of the love animating all cre...,"[Is an expression of the love to All, animatin...","[P_bevb_x, A_def, A_def, A_def, A_pron_x]",222,"[0.22, 0.25, 0.25, 0.25]"
412,If I were in charge,"I would express mySelf as heaven and earth, an...",[I would express mySelf as heaven and earth an...,"[P_bevb_x, A_def, A_def]",412,"[0.22, 0.25, 0.25]"
390,At times I worry about,the hours I work and the impact on my family l...,"[the hours and the impact on my family life, I...","[Undefined, A_def, A_pron_x, A_def, A_def]",390,"[0.25, 0.22, 0.22, 0.25]"
50,We could make the world a better place if,we all paid attention to our growth - explored...,"[we all paid attention to our growth, explored...","[A_pron_x, A_def, A_def, A_pron_x, A_def, P_be...",50,"[0.14, 0.12, 0.14, 0.14, 0.25, 0.25, 0.14, 0.2..."
346,A good child,goes to a good school,[goes to a good school],[A_def],346,[0.25]


In [8]:
def normalize(row, x_max, x_min, reverse_arr = False):
    if not reverse_arr:
        return [round((x - x_min)/(x_max - x_min), 2) for x in row]
    return [round((-1*x - x_min)/(x_max - x_min), 2) for x in row]

abstraction_score = df['abstraction_score'].tolist()
abstraction_score = [j for i in abstraction_score for j in i]
x_max, x_min = max(abstraction_score), min(abstraction_score)
df['abstraction_score_normalized'] = df['abstraction_score'].apply(lambda arr : normalize(arr, x_max, x_min))
df

Unnamed: 0,prompt,response,clauses_text_final,voice,idx,abstraction_score,abstraction_score_normalized
0,The past,"Winds through us, both from our lives and cult...",[Winds through us both from our lives and cult...,"[A_pron_x, A_pron_x, P_bevb_x, P_bevb_x, P_bev...",0,"[0.25, 0.25, 0.25, 0.25, 0.14, 0.25, 0.12, 0.1...","[1.0, 1.0, 1.0, 1.0, 0.56, 1.0, 0.48, 0.56, 0...."
1,At times I worry about,Insufficient care and attention for his egocen...,[Insufficient care and attention for his egoce...,[A_pron_x],1,[0.25],[1.0]
2,What gets me into trouble is,not considering others possibilities.,[not considering others possibilities],[A_def],2,[0.25],[1.0]
3,When people are helpless,They often don&#039;t know it so they flak aro...,"[They often don, t know it, so they flak aroun...","[P_bevb_x, A_def, A_def, A_def]",3,"[0.14, 0.14, 0.14]","[0.56, 0.56, 0.56]"
4,Privacy,is a sense of hiding from others that which yo...,"[is a sense of, hiding from others that, which...","[P_bevb_x, A_def, P_bevb_x, P_bevb_x, A_def, A...",4,"[0.14, 0.14, 0.25, 0.14, 0.14]","[0.56, 0.56, 1.0, 0.56, 0.56]"
5,If I can\'t get what I want,I really don&#039;t want that much any more.,"[want, I really don, t want that much any more]","[A_def, P_bevb_x, A_def]",5,"[0.14, 0.22, 0.25]","[0.56, 0.88, 1.0]"
6,Change is,hj;oh,[hj],[Undefined],6,[0.14],[0.56]
7,When people are helpless,At times I try to find other ways of doing thi...,"[At times I try to find other ways of, doing t...","[A_def, A_def, A_def, P_bevb_x, P_get_x, A_def...",7,"[0.22, 0.14, 0.22, 0.14, 0.12]","[0.88, 0.56, 0.88, 0.56, 0.48]"
8,When I am criticized,I often find myself running through a microgen...,"[I often find, myself running through a microg...","[A_def, A_def, A_def, A_def, A_def, A_def]",8,"[0.22, 0.25, 0.14, 0.25, 0.12, 0.25]","[0.88, 1.0, 0.56, 1.0, 0.48, 1.0]"
9,Being with other people,Is a mutually mutating meeting of universes,"[Is a meeting of universes, mutually mutating]","[P_bevb_x, A_def]",9,"[0.22, 0.14]","[0.88, 0.56]"


In [9]:
df.to_csv("./abstraction_scored.csv", index = False)