In [1]:
import nltk
import os
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

with open("BooksCleaned/Book46.txt") as book:
    contents = book.read()
    #print(contents)
    tokens = nltk.word_tokenize(contents)
    #print(tokens)
    parts_of_speech = nltk.pos_tag(tokens)
    #print(parts_of_speech)


[nltk_data] Downloading package punkt to
[nltk_data]     /home/malvinaclavering/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/malvinaclavering/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [2]:
import nltk
from nltk.corpus import wordnet   #Import wordnet from the NLTK
def find_syn(keyword):    
    syn = list()
    for synset in wordnet.synsets(keyword):
        for lemma in synset.lemmas():
            syn.append(lemma.name())    #add the synonyms
    #print('Synonyms: ' + str(syn))
    return str(syn)

In [3]:
def find_occurances_of_keyword(keyword,text):
    list_of_indexes=[]
    for index,word in enumerate(text):
        if keyword == word:
            list_of_indexes.append(index)
    return list_of_indexes

In [4]:
def find_adjectives(indexes, parts_of_speech_list):
    list_of_adjectives=[]
    for index in indexes:
        if parts_of_speech_list[index-1][1]=="JJ" or \
        parts_of_speech_list[index-1][1]=="JJR" \
        or parts_of_speech_list[index-1][1]=="JJS":
            list_of_adjectives.append(parts_of_speech_list[index-1][0])
    return list_of_adjectives

In [5]:
def find_words_that_describe_keyword(keyword,path):
    with open(path) as book:
        contents = book.read()
        tokens = nltk.word_tokenize(contents)
        parts_of_speech = nltk.pos_tag(tokens)
    keyword_indexes=find_occurances_of_keyword(keyword,tokens)
    adjectives=find_adjectives(keyword_indexes,parts_of_speech)
    return adjectives
find_words_that_describe_keyword("gentleman", "BooksCleaned/Book922.txt")

['old', 'old', 'old']

In [6]:
def find_words_in_all_books_that_describe_keyword(keyword):
    adj=[]
    for book in os.listdir("BooksCleaned"):
        adj+=(find_words_that_describe_keyword(keyword,f'BooksCleaned/{book}')) 
    return(adj)
#adj_lawyer=find_words_in_all_books_that_describe_keyword("lawyer")

In [7]:
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
#def sentiment_countifier(adjectives):
#    sia = SentimentIntensityAnalyzer()
#    str1 = " " 
#    words =str1.join(adjectives)    
#    print(words)
#    sentiments = sia.polarity_scores(words)
#    return sentiments
#print(sentiment_countifier(adj_lawyer))
def sentiment_countifier_individual(adjectives):
    sia = SentimentIntensityAnalyzer()
    pos_scores=[]    
    neu_scores=[]    
    neg_scores=[] 
    for word in adjectives:    
        sentiments = sia.polarity_scores(word)
        print(sentiments)
        pos_scores.append(sentiments["pos"])
        neu_scores.append(sentiments["neu"])
        neg_scores.append(sentiments["neg"])
    pos_score= sum(pos_scores) / len(pos_scores)
    neu_score= sum(neu_scores) / len(neu_scores)
    neg_score= sum(neg_scores) / len(neg_scores)    
    return [pos_score,neu_score,neg_score]
#print(sentiment_countifier_individual(adj_lawyer))


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/malvinaclavering/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [21]:
from nltk.tokenize.treebank import TreebankWordDetokenizer
def find_sentences_with_keyword(keyword, text):
    """
    Find the sentence that contextualizes the occurence of a keyword and return a list
    of indices that represent the range of each sentence containing the keyword.
    
    Args:
        keyword: a string representing a word to contextualize in a sentence.
        text: a tokenized text given as a list of strings of words and punctuation.
        
    Returns:
        A list of strings that are sentences containing the keyword.
    """
    keyword_locations = find_occurances_of_keyword(keyword, text)
    end_punctuation = [".", "!", "?"]
    sentence_ranges = []
    for i in keyword_locations:
        next_word = i + 1
        while text[next_word] not in end_punctuation:
            next_word += 1
        end_location = next_word + 1
        previous_word = i-1
        while text[previous_word] not in end_punctuation:
            previous_word -= 1
        start_location = previous_word + 1
        sentence_ranges.append((start_location, end_location))
    sentences=[]
    for range_ in sentence_ranges:
        new_sentence=TreebankWordDetokenizer().detokenize((text[range_[0]:range_[1]]))
        sentences.append(new_sentence.replace(" ’ ", "’"))
    return sentences

#find_sentences_with_keyword("gentleman", tokens)


["If we were not perfectly convinced that Hamlet's Father died before the play began, there would be nothing more remarkable in his taking a stroll at night, in an easterly wind, upon his own ramparts, than there would be in any other middle-aged gentleman rashly turning out after dark in a breezy spot--say Saint Paul's Churchyard for instance--literally to astonish his son's weak mind.",
 '" "We have no doubt his liberality is well represented by his surviving partner," said the gentleman, presenting his credentials.',
 '"At this festive season of the year, Mr. Scrooge," said the gentleman, taking up a pen, "it is more than usually desirable that we should make some slight provision for the Poor and destitute, who suffer greatly at the present time.',
 '"Plenty of prisons," said the gentleman, laying down the pen again.',
 'Still," returned the gentleman, "I wish I could say they were not.',
 '" "Under the impression that they scarcely furnish Christian cheer of mind or body to the mu

In [9]:
import spacy
def look_for_adjectives(word,sentence):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(sentence)
    adjectives=[]
    for token in doc:
        #print(token.text +" "+token.dep_+" "+token.head.text+" "+token.pos_)
        working_token=token
        while working_token.dep_ != "ROOT":
            if working_token.head.text==word and (token.pos_==("ADJ")):
                adjectives.append(token.text)
                break
            if working_token.head.text==working_token.text:
                break
            working_token=working_token.head
    return adjectives

In [34]:
def find_adj_in_all_sentences(keyword,path):
    with open(path) as book:
        contents = book.read()
        text = nltk.word_tokenize(contents)
        #parts_of_speech = nltk.pos_tag(tokens)
    adj=[]
    for item in find_sentences_with_keyword(keyword, text):
        print(item)
        adj+=look_for_adjectives(keyword,item)
    return adj


In [20]:
def expand_keywords(keywords_):
    new_keywords=keywords_[:]
    print(keywords_)
    for word in keywords_:
        print(word)
        new_keywords.append(word+"s")
        new_keywords.append(word.capitalize())
        new_keywords.append((word+"s").capitalize())
    return new_keywords


In [35]:
def find_adj_in_all_books(keywords):
    keywords=expand_keywords(keywords)
    adj=[]
    for word in keywords:
        for book in os.listdir("BooksCleaned"):
            adj+=(find_adj_in_all_sentences(word,f'BooksCleaned/{book}')) 
        return(adj)
adj_lawyer=find_adj_in_all_books(["lawyer"])
print(adj_lawyer)


['lawyer']
lawyer
Nell shrank timidly from all the dwarf’s advances towards conversation, and fled from the very sound of his voice; nor were the lawyer’s smiles less terrible to her than Quilp’s grimaces.
‘ You’re a nice lawyer, an’t you?
The farm-yard passed, then came the little inn; the humbler beer-shop; and the village tradesman’s; then the lawyer’s and the parson’s, at whose dread names the beer-shop trembled; the church then peeped out modestly from a clump of trees; then there were a few more cottages; then the cage, and pound, and not unfrequently, on a bank by the way-side, a deep old dusty well.
’ laughed the lawyer in an affected ecstasy.
Be quick and open the door, or if there’s another lawyer near and he should happen to look out of window, he’ll snap him up before your eyes, he will.’It is probable that the loss of the phoenix of clerks, even to a rival practitioner, would not have broken Mr Brass’s heart; but, pretending great alacrity, he rose from his seat, and going