In [None]:
#Hearst patterns implementation for Hyponyms

In [44]:
#Hearst patterns for NP0 such as {NP1, NP2, NP3, ... (and| or)} NPn for each sentence.
#returns Hyponyms(Hypernym, hyponyms) and preprocessing is required on both elements of tuple
sentence= "The bow lute, such as the Bambara ndang, is plucked and has an individual curved neck for each string"

def getSuchAsKeyValues(valueStr):
    valueList = valueStr.split(",")
    totalValues = len(valueList)
    if totalValues==1:
        return valueList
    lastValue = valueList[totalValues-1]
    if lastValue.startswith("and"):
        lastValue = lastValue[3:]
    elif lastValue.startswith("or"):
        lastValue = lastValue[2:]
    valueList[totalValues-1] = lastValue
    return valueList

def getSuchAsHyponym(sentence):
    lowerS = sentence.lower()
    suchAsStart = lowerS.find(" such as ")
    suchAsEnd = suchAsStart + 9 #9 is length of such as
    if suchAsStart == -1:
        return None
    beforeSuchAs = sentence[0:suchAsStart]
    afterSuchAs  = sentence[suchAsEnd:]
    key = beforeSuchAs
    values  = getSuchAsKeyValues(afterSuchAs)
    hyponyms = []
    for value in values:
        if len(value)<=0:
            continue
        hyponyms.append((key, value))
    return hyponyms

getSuchAsHyponym(sentence)

[('The bow lute,', 'the Bambara ndang'),
 ('The bow lute,',
  ' is plucked and has an individual curved neck for each string')]

In [43]:
#NP {, NP}* {,} or other NP
#input:- Bruises wounds, broken bones or other injuries
#output:- [('injuries', 'Bruises wounds'), ('injuries', ' broken bones')]
sentence = "Bruises wounds, broken bones or other injuries"
def orOtherHearstPatterns(sentence):
    lowerS = sentence.lower()
    orOtherStart = lowerS.find(" or other ")
    orOtherEnd   = orOtherStart + 10 #10 is length of string
    if orOtherStart == -1:
        return None
    beforeOrOther = sentence[0:orOtherStart]
    afterOrOther  = sentence[orOtherEnd:]
    key = afterOrOther
    values = beforeOrOther.split(",")
    hyponyms = []
    for value in values:
        if len(value)<=0:
            continue
        hyponyms.append((key, value))
    return hyponyms

orOtherHearstPatterns(sentence)

[('injuries', 'Bruises wounds'), ('injuries', ' broken bones')]

In [45]:
#NP {, NP}* {,} and other NP
#input: temples, treasuries, and other important civic buildings.
#outputBeforePreprocessing: [('important civic buildings.', 'temples'), ('important civic buildings.', ' treasuries')]
#outputAfterPreprocessing: [hyponym(temple, civic building), hyponym(treasury, civic building)]
sentence = "temples, treasuries, and other important civic buildings."
def andOtherHearstPatterns(sentence):
    lowerS = sentence.lower()
    andOtherStart = lowerS.find(" and other ")
    andOtherEnd   = andOtherStart + 11 # 11 is length of "and other"
    if andOtherStart == -1:
        return None
    beforeAndOther = sentence[0:andOtherStart]
    afterAndOther  = sentence[andOtherEnd:]
    key = afterAndOther
    values = beforeAndOther.split(",")
    hyponyms = []
    for value in values:
        if len(value)<=0:
            continue
        hyponyms.append((key, value))
    return hyponyms
andOtherHearstPatterns(sentence)

[('important civic buildings.', 'temples'),
 ('important civic buildings.', ' treasuries')]

In [54]:
#including hearst pattern
#input: NP {,} including {NP, }* {or|and} NP
#outputBeforePreprocessing: [('Canada', 'All common-law countries'),('England', 'All common-law countries')]
#outputAfterPreprocessing: [('Canada', 'common-law countries'),('England', 'common-law countries')]
sentence = "All common-law countries, including Canada and England"

#modify and or value
def modifyAndOrValue(s):
    andLoc = s.lower().find(" and ")
    orLoc  = s.lower().find(" or ")
    startLoc = endLoc = -1
    if orLoc>=0:
        startLoc = orLoc
        endLoc   = startLoc + 4 # 4 is length of " or "
    elif andLoc>=0:
        startLoc = andLoc
        endLoc   = startLoc + 5 # 5 is length of " and "
    else:
        return [s]
    s1 = s[:startLoc]
    s2 = s[endLoc:]
    return [s1, s2]

def includingHearstPattern(sentence):
    lowerS = sentence.lower()
    includingStart = lowerS.find(" including ")
    includingEnd   = includingStart + 11
    if includingStart == -1:
        return None
    beforeIncluding = sentence[0:includingStart]
    afterIncluding  = sentence[includingEnd:]
    values1 = beforeIncluding.split(",")
    values2 = afterIncluding.split(",")
    andOrValue = values2[len(values2)-1]
    #remove last element in values2
    values2 = values2[:len(values2)-1]
    values2.extend(modifyAndOrValue(andOrValue))
    hyponyms = []
    for key in values2:
        if len(key.strip())<=0:
            continue
        for value in values1:
            if len(value.strip())<=0:
                continue
            hyponyms.append((key, value))
    return hyponyms

includingHearstPattern(sentence)

[('Canada', 'All common-law countries'),
 ('England', 'All common-law countries')]

In [57]:
#especially Hearst Pattern
#Input: NP{,} especially {NP, }* {or|and} NP
#Output [('most European countries,', 'France'), ('most European countries,', ' England'),('most European countries,', 'Spain')]
#output after processing [[(European country', 'France'),('European country', ' England'), ('European country', 'Spain')]]
sentence = "most European countries, especially France, England, and Spain"
def especiallyHearstPattern(sentence):
    lowerS = sentence.lower()
    especiallyStart = lowerS.find(" especially ")
    especiallyEnd   = especiallyStart + 12 # 12 is length of especially
    if especiallyStart==-1:
        return None
    beforeEspecially = sentence[0:especiallyStart]
    afterEspecially = sentence[especiallyEnd:]
    key = beforeEspecially
    values = afterEspecially.split(",")
    lastValue = values[len(values)-1]
    #remove last value
    values = values[:len(values)-1]
    values.extend(modifyAndOrValue(lastValue))
    hyponyms = []
    for value in values:
        if len(value)<=0:
            continue
        hyponyms.append((key, value))
    return hyponyms

especiallyHearstPattern(sentence)

[('most European countries,', 'France'),
 ('most European countries,', ' England'),
 ('most European countries,', 'Spain')]

In [59]:
#suchNPas implementation
#input such NP as {NP}* {(or|and)} NP
#outputBeforePreprocessing: [('authors', 'Herrick'), ('authors', ' GoldSmith'), ('authors', 'Shakespeare.')]
#outputAfterProcessing: [('author', 'Herrick'), ('author', ' GoldSmith'), ('author', 'Shakespeare')]
sentence = "works by such authors as Herrick, GoldSmith, and Shakespeare."
def suchNPasHearstPattern(sentence):
    lowerS = sentence.lower()
    startSuch = lowerS.find(" such ")
    endSuch   = startSuch + 6
    if startSuch==-1:
        return None
    if lowerS[endSuch:].find(" as ")==-1:
        return None
    beforeSuch = sentence[0:startSuch]
    afterSuch  = sentence[endSuch:]
    lowerS     = afterSuch.lower()
    startAs    = lowerS.find(" as ")
    endAs      = startAs + 4
    beforeAs = afterSuch[:startAs]
    afterAs  = afterSuch[endAs:]
    key = beforeAs
    values = afterAs.split(",")
    lastValue = values[len(values)-1]
    #delete last value
    values = values[:len(values)-1]
    values.extend(modifyAndOrValue(lastValue))
    hyponyms = []
    for value in values:
        if len(value)<=0:
            continue
        hyponyms.append((key, value))
    return hyponyms

suchNPasHearstPattern(sentence)

[('authors', 'Herrick'),
 ('authors', ' GoldSmith'),
 ('authors', 'Shakespeare.')]