In [118]:
# Constant variables definitions
DIR_PATH = "sample_texts/Fullmetal Alchemist: Brotherhood/"


In [2]:
import os

In [3]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np

In [4]:
import nltk
import nltk.data
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [5]:
import re

In [27]:
# split_into_sentences()
# Parameters: text - string of text
# Description: Split text into sentences while properly retaining proper punctuation
# (Note: This function was taken from StackOverflow (https://stackoverflow.com/questions/4576077/python-split-text-on-sentences)

# Handle cases where periods are used but don't note the end of a sentence
caps = "([A-Z])"
prefixes = "(Mr|St|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|Mt)[.]"
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
websites = "[.](com|net|org|io|gov)"
digits = "([0-9])"

def split_into_sentences(text):
    text = " " + text + "  "
    text = text.replace("\n"," ")
    text = re.sub(prefixes,"\\1<prd>",text)
    text = re.sub(websites,"<prd>\\1",text)
    if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
    text = re.sub("\s" + caps + "[.] "," \\1<prd> ",text)
    text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
    text = re.sub(caps + "[.]" + caps + "[.]" + caps + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
    text = re.sub(caps + "[.]" + caps + "[.]","\\1<prd>\\2<prd>",text)
    text = re.sub(digits + "[.]" + digits,"\\1<prd>\\2",text)
    text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
    text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
    text = re.sub(" " + caps + "[.]"," \\1<prd>",text)
    if "”" in text: text = text.replace(".”","”.")
    if "\"" in text: text = text.replace(".\"","\".")
    if "!" in text: text = text.replace("!\"","\"!")
    if "?" in text: text = text.replace("?\"","\"?")
    if "..." in text: text = text.replace("...","<prd><prd><prd>")
    text = text.replace(".",".<stop>")
    text = text.replace("?","?<stop>")
    text = text.replace("!","!<stop>")
    text = text.replace("<prd>",".")
    sentences = text.split("<stop>")
    sentences = sentences[:-1]
    sentences = [s.strip() for s in sentences]
    return sentences

In [138]:
# read_file()
# Parameters: filename
# Description: Read a text file and turn it into a long string
def read_file(filename):
    
    # Open text file
    file_path = DIR_PATH + filename
    file = open(file_path)
    
    # Open text file and connect all lines
    content = ""
    with open(DIR_PATH + filename, "r+") as file:
        content = file.readlines()
    
    content = "".join(content)
    
    return content

In [153]:
# analyze_sentence_sentiment()
# Parameters: filename
# Description: Use VADER to conduct sentiment analysis on each sentence in the review, then return the aggregate score
def analyze_sentence_sentiment(text):
    
    sentences = split_into_sentences(text)

    analyzer = SentimentIntensityAnalyzer()
    all_scores = []
    for sentence in sentences:
        vs = analyzer.polarity_scores(sentence)
        all_scores.append(vs)
        print("{:-<65} {}".format(sentence, str(vs)))
    
    aggr_score = 0.0
    for scores_dict in all_scores:
        for key in scores_dict:
            if key == 'compound':
                aggr_score += scores_dict.get(key)
    
    print("\nAggregated Review Score: {}".format(aggr_score))
    print("\n--------------------------------------------------------------------------------------------------------\n")
    return aggr_score

In [154]:
# analyze_review_sentiment()
# Parameters: filename
# Description: Use the 'compound' score from VADER and aggregate it to get an overall score
def analyze_review_sentiment(filename):
    
    review_sentiment = analyze_sentence_sentiment(filename)    #list of score dicts
    
    aggr_score = 0.0
    for dictionary in review_sentiment:
        for key in dictionary:
            if key == 'compound':
                aggr_score += dictionary.get(key)
            
    return aggr_score

In [155]:
def normalize_review_sentiment(review_scores):
    
    normalized_scores = []
    for score in review_scores:
        normalized_scores.append((score - min(review_scores)) / (max(review_scores) - min(review_scores)))
        
    return normalized_scores

In [156]:
def main():

    filenames = []
    for i in range(1, 9):
        filenames.append("review" + str(i) + ".txt")

    review_scores = []
    for filename in filenames:
        text = read_file(filename)
        review_scores.append(analyze_sentence_sentiment(text))
    
    avg_review_score = 0.0
    for score in review_scores:
        avg_review_score += score
        
    avg_review_score = avg_review_score / len(review_scores)
    #normalize_scores = normalize_review_sentiment(review_scores)
    #print(normalize_scores)
    print("Average Review Score: {}".format(avg_review_score))
    #print("\nThe review's overall score is {}.".format(review_score))
    
main()

Adaptations have long been a thorn in the side of anime viewers, but not because they are inherently bad. {'neg': 0.218, 'neu': 0.782, 'compound': -0.6956, 'pos': 0.0}
No, the main problem has been that many studios have regarded the original work almost as an afterthought, and there are a number of shows that could have been wonderful if the writers had simply stuck to the original story. {'neg': 0.142, 'neu': 0.66, 'compound': 0.4019, 'pos': 0.198}
One of the issues at hand seems to be ownership as producers, writers and directors all seem to want the work to be reflective of their style and perception, and in order to stamp their mark on a show they will makes numerous unnecessary changes or additions. {'neg': 0.0, 'neu': 0.907, 'compound': 0.5423, 'pos': 0.093}
Admittedly there are times when the adaptation supersedes the original work, but more often than not the result is at best a decent anime, and at worst utter twaddle. {'neg': 0.148, 'neu': 0.656, 'compound': 0.2023, 'pos': 0

One of my favorite anime of all time!---------------------------- {'neg': 0.0, 'neu': 0.68, 'compound': 0.5093, 'pos': 0.32}
I was one of those people who thought that FMA: Brotherhood is the second season of Fullmetal Alchemist (2003). {'neg': 0.0, 'neu': 1.0, 'compound': 0.0, 'pos': 0.0}
FMA Brotherhood has been popping up in Best Anime Lists on Youtube so it piqued my interest. {'neg': 0.0, 'neu': 0.62, 'compound': 0.8214, 'pos': 0.38}
It follows, or rather it is more parallel with the story in the manga. {'neg': 0.0, 'neu': 1.0, 'compound': 0.0, 'pos': 0.0}
The art and animation is magnificent, it stayed true to the characters' design but there was a more polished look. {'neg': 0.0, 'neu': 0.796, 'compound': 0.5187, 'pos': 0.204}
The fluidity of the movements during fight scenes are also a feast for the eyes. {'neg': 0.167, 'neu': 0.833, 'compound': -0.3818, 'pos': 0.0}
I could not find a single flaw in this series (except I that I wanted more episodes). {'neg': 0.0, 'neu': 1.0, 'c

Now, this is a prime example of how to adapt an manga into an anime the RIGHT way. {'neg': 0.0, 'neu': 1.0, 'compound': 0.0, 'pos': 0.0}
The original Fullmetal Alchemist, though well made and very popular, went into a downward spiral as soon as it diverted from the manga storyline and never seemed to recover. {'neg': 0.0, 'neu': 0.769, 'compound': 0.7574, 'pos': 0.231}
This however, shows that instead of trying to add their own story elements when adapting manga/visual novels etc. {'neg': 0.0, 'neu': 1.0, 'compound': 0.0, 'pos': 0.0}
, sometimes anime producers should just sit back and retell the great story that has already been written in the original. {'neg': 0.0, 'neu': 0.738, 'compound': 0.7506, 'pos': 0.262}
This anime starts off at a breakneck pace.----------------------- {'neg': 0.0, 'neu': 1.0, 'compound': 0.0, 'pos': 0.0}
It shoots entire volumes of the manga in a couple of episodes.--- {'neg': 0.0, 'neu': 1.0, 'compound': 0.0, 'pos': 0.0}
It was to be expected though, as Bon

That's a really hard thing to do for a 64 episode anime, but this show pulls it off very well. {'neg': 0.066, 'neu': 0.783, 'compound': 0.4104, 'pos': 0.151}
The character's have there own unique features that stand out among the rest. {'neg': 0.0, 'neu': 1.0, 'compound': 0.0, 'pos': 0.0}
Now let's go to the sound.--------------------------------------- {'neg': 0.0, 'neu': 1.0, 'compound': 0.0, 'pos': 0.0}
Sound 10/10 Standout(s) The dub  The soundtrack really fit in with the anime. {'neg': 0.0, 'neu': 0.811, 'compound': 0.4201, 'pos': 0.189}
I mostly loved the music they played in the action scenes or when there was something bad going on. {'neg': 0.141, 'neu': 0.605, 'compound': 0.4215, 'pos': 0.254}
I found it very unique and weirdly fit with the tension of a scene. {'neg': 0.293, 'neu': 0.553, 'compound': -0.3102, 'pos': 0.154}
For the dub, most people consider it one of the best dubs out there and I don't blame them. {'neg': 0.0, 'neu': 0.72, 'compound': 0.738, 'pos': 0.28}
The on

In [50]:
# Below are the following files that will be tested.

files = os.listdir(DIR_PATH) # see the files in the current directory
files


['sample4.txt', 'sample3.txt', 'sample1.txt', 'sample2.txt']

In [51]:
# nltk.download('stopwords')
# nltk.download('punky')

In [52]:
eng_stops = nltk.corpus.stopwords.words('english')
symbolic_stops = ['.', ',', '!', '?', ';', ':', '[', ']']

eng_stops += symbolic_stops

# TODO: 
# Implement stop words reduction.


In [45]:

# sample_index = 0
# file_chosen = files[sample_index]

# words = process_file(file_chosen)

# nltk.word_tokenize(words)


# nltk.stem.snowball.SnowballStemmer("english")




In [4]:
sentences = [ "VADER is smart, handsome, and funny.",  # positive sentence example
              "VADER is smart, handsome, and funny!",  # punctuation emphasis handled correctly (sentiment intensity adjusted)
              "VADER is very smart, handsome, and funny.", # booster words handled correctly (sentiment intensity adjusted)
              "VADER is VERY SMART, handsome, and FUNNY.",  # emphasis for ALLCAPS handled
              "VADER is VERY SMART, handsome, and FUNNY!!!", # combination of signals - VADER appropriately adjusts intensity
              "VADER is VERY SMART, uber handsome, and FRIGGIN FUNNY!!!", # booster words & punctuation make this close to ceiling for score
              "VADER is not smart, handsome, nor funny.",  # negation sentence example
              "The book was good.",  # positive sentence
              "At least it isn't a horrible book.",  # negated negative sentence with contraction
              "The book was only kind of good.", # qualified positive sentence is handled correctly (intensity adjusted)
              "The plot was good, but the characters are uncompelling and the dialog is not great.", # mixed negation sentence
              "Today SUX!",  # negative slang with capitalization emphasis
              "Today only kinda sux! But I'll get by, lol", # mixed sentiment example with slang and constrastive conjunction "but"
              "Make sure you :) or :D today!",  # emoticons handled
              "Catch utf-8 emoji such as such as 💘 and 💋 and 😁",  # emojis handled
              "Not bad at all"  # Capitalized negation
            ]

analyzer = SentimentIntensityAnalyzer()
for sentence in sentences:
    vs = analyzer.polarity_scores(sentence)
    print("{:-<65} {}".format(sentence, str(vs)))

VADER is smart, handsome, and funny.----------------------------- {'pos': 0.746, 'compound': 0.8316, 'neu': 0.254, 'neg': 0.0}
VADER is smart, handsome, and funny!----------------------------- {'pos': 0.752, 'compound': 0.8439, 'neu': 0.248, 'neg': 0.0}
VADER is very smart, handsome, and funny.------------------------ {'pos': 0.701, 'compound': 0.8545, 'neu': 0.299, 'neg': 0.0}
VADER is VERY SMART, handsome, and FUNNY.------------------------ {'pos': 0.754, 'compound': 0.9227, 'neu': 0.246, 'neg': 0.0}
VADER is VERY SMART, handsome, and FUNNY!!!---------------------- {'pos': 0.767, 'compound': 0.9342, 'neu': 0.233, 'neg': 0.0}
VADER is VERY SMART, uber handsome, and FRIGGIN FUNNY!!!--------- {'pos': 0.706, 'compound': 0.9469, 'neu': 0.294, 'neg': 0.0}
VADER is not smart, handsome, nor funny.------------------------- {'pos': 0.0, 'compound': -0.7424, 'neu': 0.354, 'neg': 0.646}
The book was good.----------------------------------------------- {'pos': 0.492, 'compound': 0.4404, 'neu': 0.