In [12]:
import gensim
from sklearn.manifold import TSNE
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
import gensim.downloader as api
import numpy as np
import re
import csv

import pandas as pd
import pprint

import string
import nltk


from IPython.display import HTML
from nltk.corpus import wordnet 


In [23]:
pathToDatasets = '../datasets/'
filePath = '../datasets/GoogleNews-vectors-negative300.bin'
word_vectors = api.load("glove-wiki-gigaword-100")
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /home/ubuntu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /home/ubuntu/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

In [91]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize



senty = SentimentIntensityAnalyzer()
vocabulary = word_vectors.vocab;


NUMBER_OF_ALTERNATIVES = 7
TWEET_START = 75
NUM_OF_TWEETS = 15

punctuation = r"\"#$%&'()+-/:;<=>?@[\]^_`{|}~"

In [4]:
def cstr(s, color='black'):
    return "<text style=color:{}>{}</text>".format(color, s)

## Utility Code

In [89]:
def cleanAndTokenizeText(text):
    text = text.lower()
    newString = ""
    for char in text:
        if char not in punctuation:
            newString += char
    text = word_tokenize(newString)
    return text;

def listReplacements(word):
    if(word not in vocabulary):
        return []
    possibleReplacements = [word[0] for word in word_vectors.most_similar(word,topn=NUMBER_OF_ALTERNATIVES)]
    antonyms = getAntonymsOfWords(word)
    if(antonyms != []):
        print("For word [{1}]: antonyms - {0}".format(antonyms, word))
        possibleReplacements.extend(antonyms)
    return possibleReplacements

def getPOSTags(tweet):
    tags = nltk.pos_tag(tweet)
    return(tags)

def getAntonymsOfWords(word):
    if (word not in vocabulary):
        return []
    listOfAntonyms = []
    for syn in wordnet.synsets(word):
        for l in syn.lemmas():
            anton = l.antonyms()
            if(anton != []):
                listOfAntonyms.append(anton[0].name())
    return list(set(listOfAntonyms))

In [83]:
specificWord = "good"
def testOneWord(word = ""):
    if(word == ""):
        return
    else:
        print(getAntonymsOfWords(word))

testOneWord(specificWord)

['badness', 'bad', 'evilness', 'ill', 'evil']


## NLP Utility Code

In [51]:
def posApprovedReplacements(alternativeWords, userTokens, indexOfToken):
    if(alternativeWords == []):
        print("--- No alternative words! ---")
        return []
    tempTokens = userTokens[:]
    truePOSTokens = getPOSTags(tempTokens)
    validWords = []
    
    mainTag = truePOSTokens[indexOfToken][1]
    mainWord = userTokens[indexOfToken]
    
    for ind,word in enumerate(alternativeWords):
        tempTokens[indexOfToken] = word
        posTags = getPOSTags(tempTokens)
        newTag = (posTags[indexOfToken])[1]

        if(str(newTag) == str(mainTag)):
            print("Word {0}[{1}] replaced with {2}[{3}]".format(mainWord, mainTag, word,newTag))
            validWords.append(word)
    return validWords
        

In [52]:
def getAlternativeSentences(tweet, sentimentOfTweet):
    userInputTokens = cleanAndTokenizeText(tweet)
    
   
    alternativeStrings = []
    for ind,word in enumerate(userInputTokens):
        
        score = senty.polarity_scores(word)['compound'] # get the aggregated score!
        newUserTokens = userInputTokens[:]
        
        if(score != 0.0):
            replacements = listReplacements(word)       
            replacements = posApprovedReplacements(replacements[:], newUserTokens[:], ind)
            
            if(replacements == []):
                print("--- No pos approved words! ---")
                continue          
            for newWord in replacements:
                
                htmlFriendlyTokens = newUserTokens[:]
                newUserTokens[ind] = newWord
                htmlFriendlyTokens[ind] = cstr(" <i>[{0}]</i> ".format(newWord), 'blue');
                newString = ' '.join(newUserTokens)
                sentimentOfNewString = senty.polarity_scores(newString)['compound']
                
#                 if(sentimentOfNewString == 0):
#                     continue
                
                alternativeStrings.append(' '.join(htmlFriendlyTokens))
    return alternativeStrings;

## Test Cells

In [95]:
specificString = ""
def specificString(textString=""):
    if(textString == "" or textString == None):
        return
    mainSentiment = senty.polarity_scores(textString)['compound']
    if(mainSentiment == 0):
        print(" Sentiment of String shows completely neutral ");
        return
    else:
        print("{0}: {1}\n".format(textString, mainSentiment))
        newStrings = getAlternativeSentences(textString, mainSentiment)
        if(newStrings == [] or newStrings == None):
            print("--- No new strings generated ---")
            return
        for newStr in newStrings:
            sentimentOfNewString = senty.polarity_scores(newStr)['compound']
            if(sentimentOfNewString == mainSentiment or sentimentOfNewString == 0):
                display(HTML(cstr("{0}: {1}".format(newStr,sentimentOfNewString),'DarkGray')))
            elif(sentimentOfNewString > mainSentiment):
                display(HTML(cstr("{0}: {1}".format(newStr,sentimentOfNewString),'green')))
            else:
                display(HTML(cstr("{0}: {1}".format(newStr,sentimentOfNewString),'red')))

                
                

# specificString = "I bought this for my husband who plays the piano.  \
# He is having a wonderful time playing these old hymns.  \
# The music  is at times hard to read because we think the book was \
# published for singing from more than playing from.  Great purchase though!"
specificString("just got ur newsletter those fares really are [fantastic] shame i already booked and paid for mine")


just got ur newsletter those fares really are [fantastic] shame i already booked and paid for mine: -0.5209

Word fantastic[JJ] replaced with incredible[JJ]
Word fantastic[JJ] replaced with wonderful[JJ]
Word fantastic[JJ] replaced with terrific[JJ]
Word fantastic[JJ] replaced with marvelous[JJ]
Word fantastic[JJ] replaced with unbelievable[JJ]
For word [shame]: antonyms - ['honor']
Word shame[NN] replaced with disgrace[NN]
Word shame[NN] replaced with pity[NN]
Word shame[NN] replaced with embarrassment[NN]
Word shame[NN] replaced with sorrow[NN]
Word shame[NN] replaced with sadness[NN]
Word shame[NN] replaced with disgust[NN]
Word shame[NN] replaced with despair[NN]
Word shame[NN] replaced with honor[NN]


## Main Cells

In [92]:
def runThroughTweets():

    tweets_df = pd.read_csv( pathToDatasets + 'cleanedTweets.csv' , nrows=NUM_OF_TWEETS, skiprows=TWEET_START)

    tweets = tweets_df.values

    
    for counter,tweet in enumerate(tweets):
        tweet = tweet[0]
        
        mainSentiment = senty.polarity_scores(tweet)['compound']
        if(mainSentiment == 0):
            continue
        print("\nNUM({2}) {0}:{1}\n".format(tweet,mainSentiment, counter))       
        newStrings = getAlternativeSentences(tweet, mainSentiment)
        if(newStrings == [] or newStrings == None):
            print("--- No new strings generated ---\n\n")
            continue
        for alteredTweet in newStrings:
            sentimentOfNewString = senty.polarity_scores(alteredTweet)['compound']
            if(sentimentOfNewString == mainSentiment or sentimentOfNewString == 0):
                display(HTML(cstr("{0}: {1}".format(alteredTweet,sentimentOfNewString),'DarkGray')))
            elif(sentimentOfNewString > mainSentiment):
                display(HTML(cstr("{0}: {1}".format(alteredTweet,sentimentOfNewString),'green')))
            else:
                display(HTML(cstr("{0}: {1}".format(alteredTweet,sentimentOfNewString),'red')))
    
    
runThroughTweets()


NUM(0) no picnic  my phone smells like citrus.:0.0772

For word [no]: antonyms - ['yes', 'all']
Word no[DT] replaced with any[DT]
Word no[DT] replaced with all[DT]
For word [like]: antonyms - ['unlike', 'unalike', 'dislike']
Word like[IN] replaced with unlike[IN]



NUM(1)  my donkey is sensitive about such comments. nevertheless he would and me would be glad to see your mug asap. charger is still awol. :0.1779

For word [glad]: antonyms - ['sad']
Word glad[JJ] replaced with thankful[JJ]
Word glad[JJ] replaced with happy[JJ]
Word glad[JJ] replaced with i[JJ]
Word glad[JJ] replaced with sad[JJ]
Word awol[JJ] replaced with unmentioned[JJ]
Word awol[JJ] replaced with unreported[JJ]
Word awol[JJ] replaced with unrewarded[JJ]



NUM(2) no new csi tonight.  fml:-0.296

For word [no]: antonyms - ['yes', 'all']
Word no[DT] replaced with any[DT]
Word no[DT] replaced with all[DT]



NUM(3) i think my arms are sore from tennis :-0.3612

--- No pos approved words! ---
--- No new strings generated ---



NUM(4) wonders why someone that u like so much can make you so unhappy in a split seccond . depressed . :-0.6615

For word [like]: antonyms - ['unlike', 'unalike', 'dislike']
Word like[IN] replaced with unlike[IN]
Word like[IN] replaced with unalike[IN]
For word [unhappy]: antonyms - ['euphoric', 'happy']
Word unhappy[JJ] replaced with disappointed[JJ]
Word unhappy[JJ] replaced with anxious[JJ]
Word unhappy[JJ] replaced with happy[JJ]
Word unhappy[JJ] replaced with euphoric[JJ]
Word unhappy[JJ] replaced with happy[JJ]
For word [depressed]: antonyms - ['elate']
Word depressed[VBN] replaced with distressed[VBN]
Word depressed[VBN] replaced with worried[VBN]



NUM(5) sleep soon... i just hate saying bye and see you tomorrow for the night. :-0.5719

For word [hate]: antonyms - ['love']
Word hate[VB] replaced with love[VB]



NUM(6)  just got ur newsletter those fares really are unbelievable shame i already booked and paid for mine :-0.315

For word [unbelievable]: antonyms - ['credible']
Word unbelievable[JJ] replaced with incredible[JJ]
Word unbelievable[JJ] replaced with awesome[JJ]
Word unbelievable[JJ] replaced with fantastic[JJ]
Word unbelievable[JJ] replaced with awful[JJ]
Word unbelievable[JJ] replaced with phenomenal[JJ]
Word unbelievable[JJ] replaced with credible[JJ]
For word [shame]: antonyms - ['honor']
Word shame[NN] replaced with disgrace[NN]
Word shame[NN] replaced with pity[NN]
Word shame[NN] replaced with sorrow[NN]
Word shame[NN] replaced with disgust[NN]
Word shame[NN] replaced with despair[NN]
Word shame[NN] replaced with honor[NN]



NUM(9) damn... i do not have any chalk! my chalkboard is useless :-0.4753

For word [damn]: antonyms - ['bless']
Word damn[NN] replaced with darn[NN]
Word damn[NN] replaced with yeah[NN]
Word damn[NN] replaced with heck[NN]
Word damn[NN] replaced with bless[NN]
For word [useless]: antonyms - ['useful']
Word useless[JJ] replaced with irrelevant[JJ]
Word useless[JJ] replaced with meaningless[JJ]
Word useless[JJ] replaced with superfluous[JJ]
Word useless[JJ] replaced with impractical[JJ]
Word useless[JJ] replaced with pointless[JJ]
Word useless[JJ] replaced with worthless[JJ]
Word useless[JJ] replaced with harmless[JJ]
Word useless[JJ] replaced with useful[JJ]



NUM(10) had a blast at the getty villa but hates that she is had a sore throat all day. it is just getting worse too :-0.9052

For word [hates]: antonyms - ['love']
Word hates[VBZ] replaced with loves[VBZ]
Word hates[VBZ] replaced with thinks[VBZ]
Word hates[VBZ] replaced with cares[VBZ]
Word hates[VBZ] replaced with despises[VBZ]
Word hates[VBZ] replaced with likes[VBZ]
Word hates[VBZ] replaced with adores[VBZ]
Word sore[NN] replaced with shoulder[NN]
Word sore[NN] replaced with tendinitis[NN]
Word sore[NN] replaced with knee[NN]
Word sore[NN] replaced with groin[NN]
For word [worse]: antonyms - ['good', 'unregretful', 'better']
--- No pos approved words! ---



NUM(11)  hey missed ya at the meeting  sup mama:-0.296

For word [missed]: antonyms - ['have', 'hit', 'attend_to', 'attend']
Word missed[VBD] replaced with scored[VBD]
Word missed[VBD] replaced with shot[VBD]
Word missed[VBD] replaced with got[VBD]
Word missed[VBD] replaced with hit[VBD]
Word missed[VBD] replaced with attend_to[VBD]



NUM(12) my tummy hurts.  i wonder if the hypnosis has anything to do with it if so it is working i get it stop smoking!!!:-0.7332

For word [hurts]: antonyms - ['be_well']
Word hurts[VBZ] replaced with weakens[VBZ]
For word [stop]: antonyms - ['start', 'continuant_consonant', 'continue', 'begin']
Word stop[VB] replaced with start[VB]
Word stop[VB] replaced with continue[VB]
Word stop[VB] replaced with begin[VB]



NUM(14)  sorry babe!!  my fam annoys me too. thankfully they are asleep right now. muahaha. *evil laugh*:-0.2225

For word [sorry]: antonyms - ['unregretful']
Word sorry[JJ] replaced with glad[JJ]
Word sorry[JJ] replaced with sad[JJ]
Word sorry[JJ] replaced with disappointed[JJ]
Word sorry[JJ] replaced with happy[JJ]
Word sorry[JJ] replaced with regret[JJ]
Word sorry[JJ] replaced with unregretful[JJ]
--- No pos approved words! ---
Word thankfully[RB] replaced with luckily[RB]
Word thankfully[RB] replaced with fortunately[RB]
Word thankfully[RB] replaced with sadly[RB]
Word thankfully[RB] replaced with regrettably[RB]
Word thankfully[RB] replaced with admittedly[RB]
