In [1]:
import os
import nltk
from nltk import word_tokenize, pos_tag

# Download NLTK data (if not already downloaded)
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Function to identify adverbs in a text
def identify_adverbs(text):
    words = word_tokenize(text)
    tagged_words = pos_tag(words)
    adverbs = [word for word, pos in tagged_words if pos.startswith('RB')]
    return adverbs

# Function to process files in a folder
def process_folder(folder_path):
    adverbs_list = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                text = file.read()
                adverbs = identify_adverbs(text)
                adverbs_list.append(adverbs)
    return adverbs_list

[nltk_data] Downloading package punkt to /home/theo2bel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/theo2bel/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [2]:
# Path to the "neg" and "pos" folders
neg_folder_path = '/home/theo2bel/Downloads/review_polarity/txt_sentoken/neg'
pos_folder_path = '/home/theo2bel/Downloads/review_polarity/txt_sentoken/pos'

# Process negative reviews
neg_adverbs = process_folder(neg_folder_path)
print("Adverbs in negative reviews:")
print(neg_adverbs)

# Process positive reviews
pos_adverbs = process_folder(pos_folder_path)
print("\nAdverbs in positive reviews:")
print(pos_adverbs)


Adverbs in negative reviews:
[['billy', 'usually', 'well', 'lately', 'tellingly', 'very', 'unfortunately', 'away', 'not', 'progressively', 'more', 'finally', 'billy', 'finally', 'very', 'well', 'never', 'especially', 'also', 'about', 'however', 'suddenly', 'accidentally', 'subsequently', 'desperately', 'even', "n't", 'even', 'mostly', 'more', 'not', 'away', 'more', 'potentially', 'mostly', 'always', 'lightly', 'billy', 'then', 'tiresomely', 'admittedly', 'very', 'here', "n't", 'down', 'directly', 'else', 'not', 'not', 'together', 'noticably', 'most', "n't", 'even', 'down', 'ago', 'meanwhile', 'mostly', 'just', 'even', 'finally', 'molly', 'rousingly', 'then', 'completely', 'too', 'surely', 'once', 'alone', 'once', 'again', 'thoroughly', 'mostly', 'most', 'certainly', "n't", 'up', 'virtually'], ['unfortunately', 'absolutely', 'whatsoever', 'aside', 'out', 'so', 'basically', 'so', 'here', 'as', 'basically', 'much', 'not', 'especially', 'not', 'more', 'bafflingly', 'not', 'partly', 'even',


Adverbs in positive reviews:
[['about', 'just', 'precisely', 'so', 'accurately', 'never', 'also', 'largely', 'just', 'alone', 'up', 'finally', 'most', 'especially', 'too', 'literally', 'not', 'as', 'well', 'alone', "n't", 'just', 'immediately', 'only', 'together', 'together', 'never', 'really', "n't", 'however', 'not', 'never', 'instead', 'just', "n't", 'completely', 'never'], ['still', 'now', 'then', 'almost', 'later', 'soon', 'forever', 'upside', 'down', 'very', 'very', 'extremely', 'well', 'outstandingly', 'mentally', 'actually', 'once', 'brilliantly', 'also', 'as', 'perfectly', 'skillfully', 'markedly', 'finally', 'deftly', 'not', 'around', 'begrudgingly', 'close', 'already', 'now', 'then', 'similarly', 'later', 'pretty', 'close'], ['rarely', 'somewhat', 'not', 'not', 'very', 'very', 'not', 'nearly', 'not', 'so', 'amazingly', 'just', 'never', 'perhaps', 'comparatively', 'certainly', 'then', 'considerably', 'more', 'so', 'only', 'immediately', 'already', 'not', 'almost', 'however',

In [3]:
import nltk
from nltk.corpus import sentiwordnet as swn

# Download the necessary NLTK data (if not already downloaded)
nltk.download("sentiwordnet")

# Function to score adverbs using SentiWordNet
def score_adverbs_with_swn(adverbs):
    adverb_scores = []
    for text in adverbs:
        score_text=[]
        for adverb in text:
            synsets = list(swn.senti_synsets(adverb))
            if synsets:
                # Take the first synset as a simple example (you can combine multiple synsets)
                synset = synsets[0]
                positive_score = synset.pos_score()
                negative_score = synset.neg_score()
                objective_score = synset.obj_score()
                score_text.append((positive_score,negative_score,objective_score))
        adverb_scores.append(score_text)
    return adverb_scores


# Score the adverbs
adverb_scores_pos = score_adverbs_with_swn(pos_adverbs)
adverb_scores_neg = score_adverbs_with_swn(neg_adverbs)


# Now we make a sum of the negative and positive scores for a text


[nltk_data] Downloading package sentiwordnet to
[nltk_data]     /home/theo2bel/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!


In [4]:
scores_pos = [sum([float(i[0]) - float(i[1]) for i in j])/len(j) for j in adverb_scores_pos ]
# We divide by the length because if the text is much longer than others, it is more equilibrated so it does not mean anything
# And here there are 1000 texts in both positive and negative reviews so we don't have to divide by the number of texts

In [None]:
scores_neg = [sum([float(i[0]) - float(i[1]) for i in j])/len(j) for j in adverb_scores_neg ]

In [13]:
len(scores)

1000

In [8]:
sum(scores_neg)

16.88124874118352

In [5]:
sum(scores_pos)

21.71405661495686

The scores are quite near, so it is not sufficient to score as we did.