In [2]:
import spacy
import en_core_web_sm
from collections.abc import Iterable

2023-07-03 17:02:24.070344: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [4]:
# use spacy small model
nlp = en_core_web_sm.load()

# dependency markers for subjects
SUBJECTS = {"nsubj", "nsubjpass", "csubj", "csubjpass", "agent", "expl"}
# dependency markers for objects
OBJECTS = {"dobj", "dative", "attr", "oprd"}
# POS tags that will break adjoining items
BREAKER_POS = {"CCONJ", "VERB"}
# words that are negations
NEGATIONS = {"no", "not", "n't", "never", "none"}


In [5]:
# does dependency set contain any coordinating conjunctions?
def contains_conj(depSet):
    print("contains_conj")
    return "and" in depSet or "or" in depSet or "nor" in depSet or \
           "but" in depSet or "yet" in depSet or "so" in depSet or "for" in depSet

In [6]:
# get subs joined by conjunctions
def _get_subs_from_conjunctions(subs, tokens):
    print("_get_subs_from_conjunctions")
    more_subs = []
    for sub in subs:
        rights = _get_rights(sub, tokens)
        print(rights)
        rightDeps = {tok.lower_ for tok in rights}
        if contains_conj(rightDeps):
            more_subs.extend([tok for tok in rights if tok.dep_ in SUBJECTS or tok.pos_ == "NOUN"])
            if len(more_subs) > 0:
                more_subs.extend(_get_subs_from_conjunctions(more_subs, tokens))
    return more_subs

In [7]:
# get objects joined by conjunctions
def _get_objs_from_conjunctions(objs, tokens):
    print("_get_objs_from_conjunctions")
    print(objs)
    more_objs = []
    for obj in objs:
        # rights is a generator
        rights = _get_rights(obj, tokens)
        print(rights)
        if rights == []:
            return more_objs
        rightDeps = {tok.lower_ for tok in rights}
        if contains_conj(rightDeps):
            more_objs.extend([tok for tok in rights if tok.dep_ in OBJECTS or tok.pos_ == "NOUN"])
            if len(more_objs) > 0:
                more_objs.extend(_get_objs_from_conjunctions(more_objs))
    return more_objs


In [8]:
# find sub dependencies
def _find_subs(tok, tokens):
    print("_find_subs")
    head = tok.head
    while head.pos_ != "VERB" and head.pos_ != "NOUN" and head.head != head:
        head = head.head
    if head.pos_ == "VERB":
        subs = [tok for tok in head.lefts if tok.dep_ == "SUB"]
        if len(subs) > 0:
            verb_negated = _is_negated(head, tokens)
            subs.extend(_get_subs_from_conjunctions(subs, tokens))
            return subs, verb_negated
        elif head.head != head:
            return _find_subs(head, tokens)
    elif head.pos_ == "NOUN":
        return [head], _is_negated(tok, tokens)
    return [], False



In [9]:
# is the tok set's left or right negated?
def _is_negated(tok, tokens):
    print("_is_negated")
    parts = list(tok.lefts) + list(tok.rights)
    for dep in parts:
        if dep.lower_ in NEGATIONS:
            return True
    return False


In [10]:
# get all the verbs on tokens with negation marker
def _find_svs(tokens):
    print("_find_svs")
    svs = []
    verbs = [tok for tok in tokens if tok.pos_ == "VERB"]
    for v in verbs:
        subs, verbNegated = _get_all_subs(v)
        if len(subs) > 0:
            for sub in subs:
                svs.append((sub.orth_, "!" + v.orth_ if verbNegated else v.orth_))
    return svs

In [11]:
# get grammatical objects for a given set of dependencies (including passive sentences)
def _get_objs_from_prepositions(deps, is_pas):
    print("_get_objs_from_prepositions")
    objs = []
    for dep in deps:
        if dep.pos_ == "ADP" and (dep.dep_ == "prep" or (is_pas and dep.dep_ == "agent")):
            objs.extend([tok for tok in dep.rights if tok.dep_  in OBJECTS or
                         (tok.pos_ == "PRON" and tok.lower_ == "me") or
                         (tok.dep_ == 'pobj')])
    print("objs: ", objs)
    return objs



In [12]:
# get objects from the dependencies using the attribute dependency
def _get_objs_from_attrs(deps, is_pas, tokens):
    print("_get_objs_from_attrs")
    for dep in deps:
        if dep.pos_ == "NOUN" and dep.dep_ == "attr":
            verbs = [tok for tok in dep.rights if tok.pos_ == "VERB"]
            if len(verbs) > 0:
                for v in verbs:
                    rights = _get_rights(v, tokens)
                    objs = [tok for tok in rights if tok.dep_ in OBJECTS]
                    objs.extend(_get_objs_from_prepositions(rights, is_pas))
                    if len(objs) > 0:
                        return v, objs
    return None, None



In [13]:
# xcomp; open complement - verb has no suject
def _get_obj_from_xcomp(deps, is_pas, tokens):
    print("_get_obj_from_xcomp")
    for dep in deps:
        if ((dep.pos_ == "VERB" or dep.pos_ == "AUX") and dep.dep_ == "xcomp") or (dep.dep_ == "acomp"):
            v = dep
            rights = _get_rights(v, tokens)
            objs = [tok for tok in rights if tok.dep_ in OBJECTS]
            objs.extend(_get_objs_from_prepositions(rights, is_pas))
            print (objs)
            if len(objs) > 0:
                return v, objs
    return None, None



In [14]:
# get all functional subjects adjacent to the verb passed in
def _get_all_subs(v, tokens):
    print("_get_all_subs")
    verb_negated = _is_negated(v, tokens)
    dependency = _get_dependency_parse(tokens)
    print("--------")
    lefts = _get_lefts(v,tokens)
    print(dependency)
    print("lefts: ", lefts)
    subs =[]
    index = _get_verb_index(v,tokens)
    print("Indes: ", index)
    for tok in lefts:
        if tok.dep_ in SUBJECTS and tok.pos_!="DET":
            flag=0
            if tok.pos_ == "PRON":  
                for i in dependency:
                    relation, parent, child = i
                    if relation == 'obl' and parent == index:
                        subs.append(_get_at_index(child, tokens))
                        flag+=1
            if(not flag):
                subs.append(tok)
    print("subs are: ", subs)
    if len(subs) > 0:
        subs.extend(_get_subs_from_conjunctions(subs, tokens))
    else:
        foundSubs, verb_negated = _find_subs(v, tokens)
        subs.extend(foundSubs)
    return subs, verb_negated



In [15]:
def _get_verb_index(v, tokens):
    index = 1
    for token in tokens:
        if token.text == v.text:
            break
        index+=1
    return index

In [16]:
def _get_at_index(w, tokens):
    index = 1
    for token in tokens:
        if index == w:
            return token
        index+=1

In [17]:
def _get_dependency_parse(sentence):
    from stanfordcorenlp import StanfordCoreNLP
    import json
    props = {
    'annotators': 'tokenize,ssplit,pos,lemma,parse',
    'pipelineLanguage': 'en',
    'outputFormat': 'json'
    }
    nlp = StanfordCoreNLP('http://localhost', port=9000)
    return nlp.dependency_parse(sentence.text)


In [18]:
def _get_lefts(v,tokens):
    print("_get_lefts")
    index = _get_verb_index(v,tokens)
    dependency = _get_dependency_parse(tokens)
    sentence = tokens.text
    lefts=[]
    for i in dependency:
        relation, parent, child = i
        if parent == index and child < index:
            lefts.append(_get_at_index(child,tokens))
    return lefts

In [19]:
def _get_rights(v,tokens):
    index = _get_verb_index(v,tokens)
    dependency = _get_dependency_parse(tokens)
    sentence = tokens.text
    rights=[]
    for i in dependency:
        relation, parent, child = i
        if parent == index and child > index:
            rights.append(_get_at_index(child,tokens))
    return rights

In [20]:
# find the main verb - or any aux verb if we can't find it
def _find_verbs(tokens):
    print("_find_verbs(tokens)")
    verbs = [tok for tok in tokens if _is_non_aux_verb(tok)]
    print("Verbs found are : ", verbs)
    print(len(verbs))
    if len(verbs) == 0:
        verbs = [tok for tok in tokens if _is_verb(tok)]
    return verbs



In [21]:
# is the token a verb?  (excluding auxiliary verbs)
def _is_non_aux_verb(tok):
    print("_is_non_aux_verb")
    return tok.pos_ == "VERB" and (tok.dep_ != "aux" and tok.dep_ != "auxpass")



In [22]:
# is the token a verb?  (excluding auxiliary verbs)
def _is_verb(tok):
    print("_is_verb")
    return tok.pos_ == "VERB" or tok.pos_ == "AUX"


In [23]:
# return the verb to the right of this verb in a CCONJ relationship if applicable
# returns a tuple, first part True|False and second part the modified verb if True
def _right_of_verb_is_conj_verb(v, tokens):
    print("_right_of_verb_is_conj_verb")
    # rights is a generator
    rights = _get_rights(v, tokens)

    # VERB CCONJ VERB (e.g. he beat and hurt me)
    if len(rights) > 1 and rights[0].pos_ == 'CCONJ':
        for tok in rights[1:]:
            if _is_non_aux_verb(tok):
                return True, tok

    return False, v


In [24]:
# get all objects for an active/passive sentence
def _get_all_objs(v, is_pas, tokens):
    print("_get_all_objs")
    # rights is a generator
    rights = _get_rights(v, tokens)
    print("rights: ", rights)
    

    objs = [tok for tok in rights if tok.dep_ in OBJECTS or (is_pas and tok.dep_ == 'pobj')]
    print("objs: ", objs)
    if len(objs) == 1 and _get_rights(objs[0], tokens) == []:
        return v, objs
    objs.extend(_get_objs_from_prepositions(rights, is_pas))

    #potentialNewVerb, potentialNewObjs = _get_objs_from_attrs(rights)
    #if potentialNewVerb is not None and potentialNewObjs is not None and len(potentialNewObjs) > 0:
    #    objs.extend(potentialNewObjs)
    #    v = potentialNewVerb
    print("rights: ", rights)
    potential_new_verb, potential_new_objs = _get_obj_from_xcomp(rights, is_pas, tokens)
    if potential_new_verb is not None and potential_new_objs is not None and len(potential_new_objs) > 0:
        objs.extend(potential_new_objs)
        v = potential_new_verb
    if len(objs) > 0:
        objs.extend(_get_objs_from_conjunctions(objs, tokens))
    return v, objs



In [25]:
# return true if the sentence is passive - at he moment a sentence is assumed passive if it has an auxpass verb
def _is_passive(tokens):
    print("_is_passive")
    for tok in tokens:
        if tok.dep_ == "auxpass":
            print("yes")
            return True
    return False


In [26]:
# resolve a 'that' where/if appropriate
def _get_that_resolution(toks):
    print("_get_that_resolution")
    for tok in toks:
        if 'that' in [t.orth_ for t in tok.lefts]:
            return tok.head
    return None


In [27]:
# simple stemmer using lemmas
def _get_lemma(word: str):
    print("_get_lemma")
    tokens = nlp(word)
    if len(tokens) == 1:
        return tokens[0].lemma_
    return word


In [28]:
# print information for displaying all kinds of things of the parse tree
def printDeps(toks):
    print("printDeps")
    for tok in toks:
        print(tok.orth_, tok.dep_, tok.pos_, tok.head.orth_, [t.orth_ for t in tok.lefts], [t.orth_ for t in tok.rights])


In [29]:
# expand an obj / subj np using its chunk
def expand(item, tokens, visited):
    print("expand: ", item)
    if item.lower_ == 'that':
        temp_item = _get_that_resolution(tokens)
        if temp_item is not None:
            item = temp_item

    parts = []

    if hasattr(item, 'lefts'):
        for part in item.lefts:
            if part.pos_ in BREAKER_POS:
                print("Breaker lefts")
                break
            if not part.lower_ in NEGATIONS:
                if part.text == 'an' or part.text == 'a': 
                    continue
                parts.append(part)

    parts.append(item)

    if hasattr(item, 'rights'):
        for part in item.rights:
            if part.pos_ in BREAKER_POS:
                print("Breaker rights")
                break
            if not part.lower_ in NEGATIONS:
                parts.append(part)
    print(parts)
    if hasattr(parts[-1], 'rights'):
        for item2 in parts[-1].rights:
            if (item2.pos_ == "DET" and item2.text != "an") or item2.pos_ == "NOUN":
                if item2.i not in visited:
                    visited.add(item2.i)
                    parts.extend(expand(item2, tokens, visited))
            break
    print(parts)
    return parts


In [30]:
# convert a list of tokens to a string
def to_str(tokens):
    if isinstance(tokens, Iterable):
        return ' '.join([item.text for item in tokens])
    else:
        return ''


In [31]:
# find verbs and their subjects / objects to create SVOs, detect passive/active sentences
def findSVOs(tokens):
    svos = []
    is_pas = _is_passive(tokens)
    verbs = _find_verbs(tokens)
    visited = set()  # recursion detection
    for v in verbs:
        subs, verbNegated = _get_all_subs(v, tokens)
        # hopefully there are subs, if not, don't examine this verb any longer
        if len(subs) > 0:
            isConjVerb, conjV = _right_of_verb_is_conj_verb(v, tokens)
            print("Is conjVerb = ", isConjVerb, " Conj Verb: ", conjV)
            if isConjVerb:
                v2, objs = _get_all_objs(conjV, is_pas, tokens)
                for sub in subs:
                    for obj in objs:
                        objNegated = _is_negated(obj, tokens)
                        if is_pas:  # reverse object / subject for passive
                            svos.append((to_str(expand(obj, tokens, visited)),
                                         "!" + v.lemma_ if verbNegated or objNegated else v.lemma_, to_str(expand(sub, tokens, visited))))
                            svos.append((to_str(expand(obj, tokens, visited)),
                                         "!" + v2.lemma_ if verbNegated or objNegated else v2.lemma_, to_str(expand(sub, tokens, visited))))
                        else:
                            svos.append((to_str(expand(sub, tokens, visited)),
                                         "!" + v.lower_ if verbNegated or objNegated else v.lower_, to_str(expand(obj, tokens, visited))))
                            svos.append((to_str(expand(sub, tokens, visited)),
                                         "!" + v2.lower_ if verbNegated or objNegated else v2.lower_, to_str(expand(obj, tokens, visited))))
            else:
                v, objs = _get_all_objs(v, is_pas, tokens)
                for sub in subs:
                    if len(objs) > 0:
                        for obj in objs:
                            objNegated = _is_negated(obj, tokens)
                            if is_pas:  # reverse object / subject for passive
                                svos.append((to_str(expand(obj, tokens, visited)),
                                             "!" + v.lemma_ if verbNegated or objNegated else v.lemma_, to_str(expand(sub, tokens, visited))))
                            else:
                                svos.append((to_str(expand(sub, tokens, visited)),
                                             "!" + v.lower_ if verbNegated or objNegated else v.lower_, to_str(expand(obj, tokens, visited))))
                    else:
                        # no obj - just return the SV parts
                        svos.append((to_str(expand(sub, tokens, visited)),
                                     "!" + v.lower_ if verbNegated else v.lower_,))

    return svos

In [33]:
doc = nlp("""As a customer, I can make a purchase. Customer pays for his delicious food. Waiter serves food for the customers. Admin controls the whole website.
Admin adds functions to the website as he wishes.
Admin blocks a user. Users can filter the prices. As a system user, I want to add courses to the website.
Admin can add courses. Users modify their account information. Admin can add and remove users from the website. The USA Government blocked tiktok because it's harmful for the children. As an admin, I want to remove users. As an admin, I want to add courses so that I can access them later. As an admin, I want to add function so that I can access them later. As an admin, I want to add users. As a user, I want to access the entire website. As a user, I want to see my profile. As an admin, I want to add functions to the website as I wish.""")
sentences = []
vectors = []
for sentence in doc.sents :
    print(sentence)
    svos = findSVOs(sentence)
    vectors.append(svos)
    print(svos)
    print(type(svos))
    print("-----------------------------------------------------------------")
print(vectors) 
#     _is_passive = false


As a customer, I can make a purchase.
_is_passive
_find_verbs(tokens)
_is_non_aux_verb
_is_non_aux_verb
_is_non_aux_verb
_is_non_aux_verb
_is_non_aux_verb
_is_non_aux_verb
_is_non_aux_verb
_is_non_aux_verb
_is_non_aux_verb
_is_non_aux_verb
Verbs found are :  [make]
1
_get_all_subs
_is_negated
--------
_get_lefts
[('ROOT', 0, 7), ('case', 3, 1), ('det', 3, 2), ('obl', 7, 3), ('punct', 7, 4), ('nsubj', 7, 5), ('aux', 7, 6), ('det', 9, 8), ('obj', 7, 9), ('punct', 7, 10)]
lefts:  [customer, ,, I, can]
Indes:  7
subs are:  [customer]
_get_subs_from_conjunctions
[]
contains_conj
_right_of_verb_is_conj_verb
Is conjVerb =  False  Conj Verb:  make
_get_all_objs
rights:  [purchase, .]
objs:  [purchase]
_is_negated
expand:  customer
[customer]
[customer]
expand:  purchase
[purchase]
[purchase]
[('customer', 'make', 'purchase')]
<class 'list'>
-----------------------------------------------------------------
Customer pays for his delicious food.
_is_passive
_find_verbs(tokens)
_is_non_aux_verb
_i

--------
_get_lefts
[('ROOT', 0, 3), ('nsubj', 3, 1), ('aux', 3, 2), ('cc', 5, 4), ('conj', 3, 5), ('obj', 3, 6), ('case', 9, 7), ('det', 9, 8), ('obl', 3, 9), ('punct', 3, 10)]
lefts:  [Admin, can]
Indes:  3
subs are:  [Admin]
_get_subs_from_conjunctions
[]
contains_conj
_right_of_verb_is_conj_verb
Is conjVerb =  False  Conj Verb:  add
_get_all_objs
rights:  [remove, users, website, .]
objs:  [users]
_is_negated
expand:  Admin
[Admin]
[Admin]
expand:  users
[users]
[users]
_get_all_subs
_is_negated
--------
_get_lefts
[('ROOT', 0, 3), ('nsubj', 3, 1), ('aux', 3, 2), ('cc', 5, 4), ('conj', 3, 5), ('obj', 3, 6), ('case', 9, 7), ('det', 9, 8), ('obl', 3, 9), ('punct', 3, 10)]
lefts:  [and]
Indes:  5
subs are:  []
_find_subs
[('Admin', 'add', 'users')]
<class 'list'>
-----------------------------------------------------------------
The USA Government blocked tiktok because it's harmful for the children.
_is_passive
_find_verbs(tokens)
_is_non_aux_verb
_is_non_aux_verb
_is_non_aux_verb
_is

--------
_get_lefts
[('ROOT', 0, 6), ('case', 3, 1), ('det', 3, 2), ('obl', 6, 3), ('punct', 6, 4), ('nsubj', 6, 5), ('mark', 8, 7), ('xcomp', 6, 8), ('obj', 8, 9), ('punct', 6, 10)]
lefts:  [admin, ,, I]
Indes:  6
subs are:  [admin]
_get_subs_from_conjunctions
[]
contains_conj
_right_of_verb_is_conj_verb
Is conjVerb =  False  Conj Verb:  want
_get_all_objs
rights:  [add, .]
objs:  []
_get_objs_from_prepositions
objs:  []
rights:  [add, .]
_get_obj_from_xcomp
_get_objs_from_prepositions
objs:  []
[users]
_get_objs_from_conjunctions
[users]
[]
_is_negated
expand:  admin
[admin]
[admin]
expand:  users
[users]
[users]
_get_all_subs
_is_negated
--------
_get_lefts
[('ROOT', 0, 6), ('case', 3, 1), ('det', 3, 2), ('obl', 6, 3), ('punct', 6, 4), ('nsubj', 6, 5), ('mark', 8, 7), ('xcomp', 6, 8), ('obj', 8, 9), ('punct', 6, 10)]
lefts:  [to]
Indes:  8
subs are:  []
_find_subs
[('admin', 'add', 'users')]
<class 'list'>
-----------------------------------------------------------------
As a user, 

In [35]:
import graphviz
from graphviz import Digraph
from IPython.display import Image

In [58]:
import re
def check_user_in_sentence(sentence):
    pattern = r"user"
    match = re.search(pattern, sentence, flags=re.IGNORECASE)
    
    if match:
        return True
    else:
        return False

def check_admin_in_sentence(sentence):
    pattern = r"admin"
    match = re.search(pattern, sentence, flags=re.IGNORECASE)
    
    if match:
        return True
    else:
        return False

In [61]:
# Create a new graph
graph = graphviz.Digraph('G', format='png')
graph.attr(rankdir='LR') 

# stickman figure 
import os
image_path = os.path.abspath('stick.png')

actors = []
pattern = r"user"
for case in vectors:
    if case == []:
        continue
    actor = case[0][0]
    actor = actor.lower()
    if check_user_in_sentence(actor):
        actor = "user"
    if check_admin_in_sentence(actor):
        actor = "admin"
    if actor not in actors:
        actors.append(actor)
        graph.node(actor, shape='plaintext', image=image_path, height='1', width='1')
    
    use_case = ' '.join(case[0][1:])
    graph.node(use_case, shape='ellipse', style='solid')
    graph.edge(actor, use_case, arrowhead='none')


graph.attr('node', rank='same')
graph.render(filename='output', directory='.', format='png')


'output.png'