In [3]:
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer
import nltk
nltk.download('movie_reviews')
nltk.download('punkt')

with open('./data/2020_BUR_02.txt', 'r') as reader:
    burbank_text = reader.read()

[nltk_data] Downloading package movie_reviews to
[nltk_data]     /home/jorgenv/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
[nltk_data] Downloading package punkt to /home/jorgenv/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Exercise 1

In [4]:
blob = TextBlob(burbank_text, analyzer=NaiveBayesAnalyzer())
blob.sentiment

Sentiment(classification='pos', p_pos=1.0, p_neg=1.0487590501191368e-20)

Probability of sentiment being negative is nearly zero.

# Exercise 2

In [2]:
testimonial = TextBlob(burbank_text)
testimonial.sentiment

Sentiment(polarity=0.09869334480780263, subjectivity=0.3790877796901893)

We can see that the text is neutral (polarity between -0.5 and 0.5), and is more objective (subjectivity < 0.5). 

# Exercise 3

In [109]:
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd

vectorizer = TfidfVectorizer(max_features=2000, min_df=1, stop_words='english')

with open('./data/2020_BUR_02.txt', 'r') as reader:
    X = vectorizer.fit_transform(reader)

def perplexity_by_ntopic(data, ntopics):
    output_dict = {
        "Number Of Topics": [], 
        "Perplexity Score": []
    }
    for t in ntopics:
        lda = LatentDirichletAllocation(
            n_components=t,
            learning_method="online",
            random_state=0
        )
        lda.fit(data)
        output_dict["Number Of Topics"].append(t)
        output_dict["Perplexity Score"].append(lda.perplexity(data))
    output_df = pd.DataFrame(output_dict)
    index_min_perplexity = output_df["Perplexity Score"].idxmin()
    output_num_topics = output_df.loc[
        index_min_perplexity,  # index
        "Number Of Topics"  # column
    ]
    return (output_df, output_num_topics)


df_perplexity, optimal_num_topics = perplexity_by_ntopic(
    X, 
    ntopics=[i for i in range(1, 21) if i % 2 == 0]
)

In [110]:
burbank_lda = LatentDirichletAllocation(n_components=optimal_num_topics, n_jobs=-1)
burbank_lda.fit(X)

LatentDirichletAllocation(n_components=2, n_jobs=-1)

In [112]:
def print_topics(model, count_vectorizer, n_top_words):
    words = count_vectorizer.get_feature_names()
    for topic_idx, topic in enumerate(model.components_):
        print("\nTopic #%d:" % topic_idx)
        print(" ".join([words[i]
                        for i in topic.argsort()[:-n_top_words - 1:-1]]))

print_topics(burbank_lda, vectorizer, 20)


Topic #0:
valley noise force task burbank california san community faa southern senators fernando worked said air recommendations representatives groups issues traffic

Topic #1:
controllers study traffic air consulting recommended turns northbound procedures make flights departing groups faa training solve analyzed manage resources refresh


# Exercise 4

In [59]:
import pandas as pd

amazon_df = pd.read_csv('./data/amazon_cells_labelled.txt', sep='\t', names=['Review', 'Label'])
amazon_df['Company'] = 'Amazon'
imdb_df = pd.read_csv('./data/imdb_labelled.txt', sep='\t', names=['Review', 'Label'])
imdb_df['Company'] = 'imdb'
yelp_df = pd.read_csv('./data/yelp_labelled.txt', sep='\t', names=['Review', 'Label'])
yelp_df['Company'] = 'yelp'

print(amazon_df['Label'].value_counts())

0    500
1    500
Name: Label, dtype: int64


In [60]:
comb_data = pd.concat([amazon_df, imdb_df, yelp_df], ignore_index=True)

print(comb_data['Label'].value_counts())
print(comb_data['Company'].value_counts())

1    1386
0    1362
Name: Label, dtype: int64
yelp      1000
Amazon    1000
imdb       748
Name: Company, dtype: int64


In [61]:
comb_data.to_csv('./data/Sentiment_Analysis_Dataset.csv')

In [62]:
print(comb_data.columns.values)

['Review' 'Label' 'Company']


In [63]:
print(comb_data.isna().sum())

Review     0
Label      0
Company    0
dtype: int64


In [64]:
import spacy
from string import punctuation
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.en import English

extra_words=list(STOP_WORDS)+['\n']
nlp=spacy.load('en_core_web_sm')

In [79]:
def clean_text(text):
    text = text.translate(str.maketrans('', '', punctuation))
    text = ' '.join([word for word in text.split(' ') if word not in extra_words])
    return text.lower()

In [73]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.base import TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

In [97]:
parser = spacy.load("en_core_web_sm")

def text_tokenizer(text):
    text_tokens = parser(text)
    print(text_tokens)
    text_tokens = [word.lemma_.lower().strip() if word.lemma_ != "-PRON-" else word.lower_ for word in text_tokens]
    return text_tokens

In [100]:
class predictors(TransformerMixin):
    def transform(self, X, **transform_params):
        return [clean_text(text) for text in X]
    
    def fit(self, X, y=None, **fit_params):
        return self
    
    def get_params(self, deep=True):
        return {}

In [101]:
tfidf_vector = TfidfVectorizer(tokenizer=text_tokenizer)

In [102]:
X = comb_data['Review'].values
y = comb_data['Label'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [103]:
classifier = LinearSVC()

In [104]:
pipe = Pipeline([("cleaner",predictors()),('vectorizer',tfidf_vector),('classifier',classifier)])

In [105]:
pipe.fit(X_train, y_train)

the phone gets extremely hot
if reading dont
ill drivng headset starts ringing reason
not what i wanted
the lighting dark set mood
great price
be sure order dessert need pack togo  tiramisu cannoli die
before i i gave 1 star rating know time eating bachi burger writing review
i bother contacting company dollar product i learned lesson i bought form online
even women finally sign improvement expected things happen time film far asleep  
it year reminded huston game evinced faithful adaptation james joyces acclaimed novella the dead  
it fun experience
also music mark snow possibly best score ive heard  
the service leaves lot desired
the fish badly underwater shots repeated thousand times film  
i think robert ryans best film portrayed like father schizophrenic real lifemy father murdered affected second world war worse  
top line dont waste time money bad comes  
i recommend place wrong donut place
i agree jessica movie pretty bad  
very bad experience
would recommend item
these certai

i verizon 2 years ago liked service
the food good
dont waste money
must night place
the waitresses friendly
everything movie stupid  
has working great
you know pushed hard right number times function want
i store bought new nokia phone working great
anyways the food definitely filling price pay expect
i insulted
i asked multiple times wine list time ignored i went hostess got
all broke months use
movie makes lot mistakes  
i dont understand garbage got shelves movie store real movie  
i love camera pretty good quality
its long time i entertained movie  
watching washing machine twirling wouldnt hurt eyes  
sucked stuff work phone
yet plantronincs continues use flawed charger design
almost songs cover girl oldfashioned tuneful  
i hope place sticks
very good lunch spot
i phone years best
it feels poorly constructed menus difficult navigate buttons recessed difficult push
perabo nice energy level obviously comfortable camera  
a pretty good product
a bit predictable  
yes bad  
its well

linked phone effort
i recently tried caballeros i week
the place fairly clean food simply wasnt worth
it presents idyllic portrayal ups downs characters lives  
very nice relaxing late night viewing  
im happy
plus 8 bucks
it blew  
battery life real good
muddy low quality sound casing wires insert poorly super glued slid
movie littered overt racial slurs black cast members return whites depicted morons boobs  
the food delicious bartender attentive personable and got great deal
as service i thought good
at time film animation dominated disneypixars cgi masterpieces refreshing comforting know miyazaki relying traditional handdrawn animation tell charming enchanting stories  
not loud doesnt turn like
portable works
shrimp when i unwrapped i live 12 mile brushfire literally ice cold
the chicken wings contained driest chicken meat i eaten
best rotating feature helpful
happy far
it extremely crumby pretty tasteless
its uncomfortable sound quality poor compared phone razr previous wired he

as id like i cant passed atrocious service return
what film lacks convincing script  
this particular model work motorola q smartphone
great product
the food terrible
people like european films art movies like movie  
it lasts 3o minutes i actually try use phonemy wife phone problem
i like armand assante  cable companys summary sounded interesting i watched twice probably  
still waiting im sure item work i recieve
crisp clear
this place amazing
def coming bowl time
i wouldnt return
predictable bad watch  
my colleague  i great receptiona little expensive performance great
its wild stuff highly recommended fans giallo cinema  
phenomenal food service ambiance
not sure lost  flat characters audience nearly half walked  
however bt headsets currently good real time games like firstperson shooters audio delay messes
very disappointing thing no speakerphone
 comes strong light use light camera shots flash sos signals seriously
please stay away shrimp stir fried noodles
think film like drea

the bt headset disapoinment
will order
also phone doesnt accept cbr mp3s preferably ripped windows media player
a study interested worst sins industrial design
if movie needed wordofmouth promote  
that screams legit booksomethats pretty rare vegas
i work hospitality industry paradise valley refrained recommending cibo longer
this place awesome want light healthy summer
the service bit lacking
otherwise dont waste time  
i wasnt impressed strip steak
im terribly disappointed film receive awards accolades especially far deserving works film  
just reading specs makes wow
he deserves 5 stars
even allowing poor production values time 1971 format kind miniseries baaaaaad  
the design good4
it acted tv movie  
i heart place
 happy wonderful feel good ending  
other leather nice soft fit tight cut face good shape
i think food flavor texture lacking
the rest movie lacks art charm meaning if emptiness works i guess  
there pathetic attempts characters depth didnt work rest plot  
its shame goo

jimmy stewart great hero hitchcock story rips cool climax embassy function lacks brooding menace hitchcocks black white lowbudget original  
thanks amazon having things i need good price
it defeats purpose bluetooth headset
pretty awesome place
the warmth generates contrast austere backdrop  
later i found lost power film  
this hole wall great mexican street tacos friendly staff
their network coverage los angeles horrible
cinematography noteworthy including fine views barcelona famed gaudi towers  
it shouldnt 30 min pancakes eggs
dont buy product  it fails
real sushi lovers lets honest  yama good
i contacted company told unit warranty i couldnt produce receipt i luck
excellent wallet type phone case
 both hot  sour  egg flower soups absolutely 5 stars
the steak amazingrge fillet relleno best seafood plate
this best italian thrillers early 70s  
i given star i able
the worst phone ive only months
loved itfriendly servers great food wonderful imaginative menu
the steak shrimp opinion b

on cafe serves good food
con spotty service
do not buy d807wrongly advertised d807
i highly recommend encourage people try
as people complained i found headsets microphone weak
by far best cheesecurds
poor quality service
after bite i hooked
you need 3 mins phone book time turn phonebattery life short
unreliable  im giving
i came verizon cingulair nicer cell phones thing i noticed bad service
i happy product
and i forgot the casting superb trond fausa aurvã¥g perfect role bothersome man doesnt understand  
the best place tasty bowl pho
i posted detailed comments grey black phone fire red great color
watch preparing delicious food
disapointing results
they cool
how stupid
the burrittos blah
the 12 mega pixel camera phone reasonably good7
cast great  
i dont words place pretty
otherwise easy install use clear sound
i wont try going
i absolutely horrible reception apartment phones i problem
20th century foxs road house 1948 silly noir implausible unmitigated bore movie  
couldnt ask satis

im sorry i cant recommend  
like good quick place grab bite familiar pub food favor look
the volume ringer real good choices loud
the chains im fan beat place easily
so way plug us i converter
it pale color instead nice char no flavor
this greedy corporation never dime
i wanted plantronics 510 right issues methe good
not screamy masculine right  
the macarons insanely good
all tapas dishes delicious
bad purchase
wont work right atleast
the stories unbelievable actors  
its aggravating
these good ordered twice
i definitely recommend wings pizza
the waitress manager friendly
nice blanket moz feel like cover subpar food
they know
the igo chargers tips great
this product high quality chinese crap
bad quality
almost involved return school acting utterly predictable bad script pile garbage round  
one boringpointless movies i seen  
the calls drop phone comes screen goes black worst stops ringing intermittently
the movie little slow  
a lot websites rating good phone i
the result film dont l

the servers pleasant deal dont honor pizza hut coupons
superb phone great network
id hardest decision honestly ms dishes taste supposed taste amazing
and red curry bamboo shoots wasnt tasty
pretty cool i
felt insulted disrespected talk judge human like
the seafood fresh generous portion
their regular toasted bread equally satisfying occasional pats butter mmmm
jawbone era awesome
the directing cinematography arent good  
it want suspense drama comedy confusing subplots native americans brain eating if youre looking beall endall brainsucking movies look  
the chicken dishes ok beef like shoe leather
bad cause i know family owned i wanted like place
would reccommend
each day week different deal delicious
low quality
we group 70 claimed 40 handled beautifully
the servers went forth times are helped
treo tmobile refused replace forced buy phone kind upgrade discount
the service extremely slow
i able voice dialing car problem
this moviemaking  
some highlights  great quality nigiri
paying 7

i connected wifes bluetoothmotorola hs850 phone worked like charm phone pocket case
excellent service
like reviewer said couldnt pay eat place
no buyers remorse
sending
we sending
this movie awesome  
this stunning movie  
i ordered voodoo pasta time id excellent pasta going gluten free years ago
using earpieces left right thing stay ear
so far so good
the seller understanding patient i definitely buy
the heart attack grill downtown vegas absolutely flatlined excuse restaurant
delicious nyc bagels good selections cream cheese real lox capers
it northern humour positive community represents  
i recommend
the movie terribly boring places  
end days worst bigbudget action movies ive seen  
this utterly confusing caused lose couple important contacts
ordered appetizer took 40 minutes pizza 10 minutes
spaghetti special whatsoever
we waited thirty minutes seated 8 vacant tables folks waiting
thank wasting money
provides good protection looks classy
the cashier care i ended wayyy overpriced
i

nothing short magnificent photographycinematography film  
i totally absolutely recommend movie likes good wholesome family movies exactly  
tom wilkinson broke heart end elses judging fumbling hankies hands going faces males females alike  
this best bars food vegas
it feels comfortable headsets i wear glasses gets way
bela lugosi totally extraneous intoning odd lines  
great product fast shipping
2 thumbs seller
it good
we asked bill leave eating didnt bring
good product  incredible value
horrible  dont waste time money
she ordered toasted english muffin came untoasted
i gotten defect i risk buying built quality
then i exchanged phone problem
no table thought food average worth wait
in span hour i people exclaim whoa  new phone tv
in fact stinker smells like directtovideo release  
to summarize food incredible nay transcendant brings joy like memory pneumatic condiment dispenser
charger worked week completely stopped charging phone
del taco pretty nasty avoided possible
the ngage lac

Pipeline(steps=[('cleaner', <__main__.predictors object at 0x7ff573eaaaf0>),
                ('vectorizer',
                 TfidfVectorizer(tokenizer=<function text_tokenizer at 0x7ff5708f4820>)),
                ('classifier', LinearSVC())])

In [106]:
predicted = pipe.predict(X_test)

the nokia ca42 usb cable work phone
voice recognition tremendous
for product costs i expect work far better greater ease thing
 in fact hard remember ray charles acted played man  	1
ray charles legendary  	1
ray charles life provided excellent biographical material film goes movie musician  	1
hitchcock great director  	1
ironically i find films total waste time watch  	0
secondly hitchcock pretty perfected thriller chase movie  	1
its pandering audience sabotages films  	0
hence story lacks certain energy  	0
the plot simply rumbles like machine desperately depending addition new scenes  	0
there usual hitchcock logic flaws  	0
mishima extremely uninteresting  	0
this chilly unremarkable movie author livingworking chilly abstruse culture  	0
the flat reenactments dont hold attention emotionally adrift stagy  	0
and rest sits awful soldiers singing songs masculinity pledge hairsplitting purity admiration swords etc  	0
he bore pieces kill momentum movie quicker  	0
schrader resume lou

it handles tough issues dignity grace course shocking spoiler  
worst foodservice ive
good transmit audio
great pizza salads
it equally awful
its user friendly
it doesnt look cool
the opening sequence gem classic cat n mouse games follow delight watch  
price good
my visit hiro delight
the food came good pace
i come
i watched prices inflate portions smaller management attitudes grow rapidly
its case bad laughable  
i sorry i purchase
the biggest complaint i battery drains superfast
brilliance  
i uneasy bad movie scared  
seafood limited boiled shrimp crab legs crab legs definitely taste fresh
good beer  drink selection good food selection
i 23 bars cell phone i home cant hear
very disappointed wondered oscar shortlist  
not volume
lame best way describe  
we wont returning
wasted hours  
an instant classic great soundtrack catchy song ending credits  
if razr owneryou
very friendly staff
and sound quality great
if seen movie i definitely recommend  
car charger ac charger included sur

this witty delightful adaptation dr seuss book brilliantly animated upas finest thoroughly deserving academy award  
it sure beat nachos movies i expect little bit coming restaurant
great case price
works like charm work i got phone
if look authentic thai food
a couple months later i returned amazing meal
5 stars brick oven bread app
first  bathrooms location dirty seat covers replenished  plain yucky
couldnt figure
pretty good beer selection
and drinks weak people
thats rightthe red velvet cakeohhh stuff good
do favor stay away dish
it horrendous  
appetite instantly gone
the annoying thing cover girl way rita hayworth pedestal  
the real disappointment waiter
the poor batter meat ratio chicken tenders unsatisfying
its adorable seeing mickey playing turkey straw highly imaginative occasionally cruel way  
it pretty gross
it kept getting worse worse im officially
the film deserves strong kudos taking stand having exceptional acting lesserknown cast superintelligent script doesnt insult

a great touch
feelings thoughtsgabriels discomfort danceall intangibles leap life come viewers grasp hustons portrayal  
i paid  
i ate twice visit especially enjoyed salmon salad
plus i seriously believe worth steep price point
great audio
hands favorite italian restaurant
good samsung
what great double cheeseburger
just spicy perfect actually
in conclusion i bother movie volcano los angeles nonsense  
cant store phone numbers sim
when im town definitely spot ill hit
elias koteasjack palance play good roles angelina hot gets nakedbilly drago appears cool usual  cameo sven ole thorsen helps enjoyable movie good acting decent budget  
hated  
i known errol flynn brilliant actor dads favourite actor i grew watching films child  
bluetooth range good  days ago i left phone trunk got carried conversation hitch
i dont think hold securly belt
this little device transformed organizational capability life lot easier
what bother slow service
regardless film fails levels  
i love place
despite h

In [107]:
score = accuracy_score(y_test,predicted)

print(score)

0.8054545454545454
