## Import Statements

In [1]:
import gensim.downloader as api
import numpy as np
import re
import csv
import pandas as pd
import pprint
import string
import nltk
import sys

from IPython.display import HTML
from nltk.corpus import wordnet 
from sklearn.manifold import TSNE
from gensim.models import Word2Vec
from gensim.models import KeyedVectors

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize

pathToDatasets = '../datasets/'
pathToDataScripts = '../datasets/scripts/'
filePath = '../datasets/GoogleNews-vectors-negative300.bin'

sys.path.insert(0, pathToDataScripts)
from cleanDataset import tokenize_words 



## Downloading binaries and models


In [2]:
word_vectors = api.load("glove-wiki-gigaword-100")
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /home/ubuntu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /home/ubuntu/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

## Global Variables and Global Objects

In [3]:
senty = SentimentIntensityAnalyzer()
vocabulary = word_vectors.vocab;


NUMBER_OF_ALTERNATIVES = 7
TWEET_START = 50
NUM_OF_TWEETS = 25

punctuation = r"\"#$%&'()+-/:;<=>?@[\]*^_`{|}~"

## Class for Sentences


In [4]:
class Sentence:
    
    def __init__(self, sentence, sentiment):
        self.ogSentence = sentence;
        self.ogSentiment = sentiment;
        self.indexToSetOfWords = {}
        self.alternateSentences = [];
        self.alternateHTMLs = [];
        self.finalShiftSentences = [];

    def addAlternativesByIndex(self, index, listOfAlternatives):
        '''
            Adds the list of possible alternative words that 
            can be used per word based on the index of the word in the tokenized 
            sentence. (from cleanAndTokenizeText())
        '''
        if(self.indexToSetOfWords.get(index)):
            self.indexToSetOfWords[index] = self.indexToSetOfWords.union(set(listOfAlternatives))
        else:
            self.indexToSetOfWords[index] = set(listOfAlternatives)
        
    def addAlternativeStrings(self, strings):
        if(isinstance(strings,str)):
            self.alternateSentences.append(strings)
        else:
            self.alternateSentences.extend(strings)
    
    def addHTMLStrings(self, htmls):
        if(isinstance(htmls, str)):
            self.alternateHTMLS.append(htmls)
        else:
            self.alternateHTMLs.extend(htmls)
    
    def addFinalSentences(self, sentences):
        self.finalShiftSentences.extend(sentences)
        if(isinstance(sentences, str)):
            self.finalShiftSentences.append(sentences)
        else:
            self.finalShiftSentences.extend(sentences)
            

## Utility Code

In [35]:
def cstr(s, color='black'):
    return "<text style=color:{}>{}</text>".format(color, s)

def cleanAndTokenizeText(text):
    text = text.lower();
    newString = ""
    for char in text:
        if char not in punctuation:
            newString += char
    text = word_tokenize(newString)
    return text;

def getPOSTags(tweet):
    tags = nltk.pos_tag(tweet)
    return tags;

def getAntonymsOfWords(word):
    if(word not in vocabulary):
        return []
    setOfAntonyms = set()
    for syn in wordnet.synsets(word):
        for l in syn.lemmas():
            anton = l.antonyms()
            if(anton!=[]):
                setOfAntonyms.add(anton[0].name())
    if(len(setOfAntonyms) == 0):
        print("No antonyms found for word {0}".format(word))
    return list(setOfAntonyms)

def listReplacements(word):
    if(word not in vocabulary):
        print(" --- {0} not in vocabulary ---".format(word))
        return []
    possibleReplacements = [word[0] for word in word_vectors.most_similar(word,topn=NUMBER_OF_ALTERNATIVES)]
    if(possibleReplacements == []):
        print(" --- No replacements for word {0} ---".format(word))
    antonyms = getAntonymsOfWords(word)
    if(antonyms != []):
        possibleReplacements.extend(antonyms)
        print("Some antonyms for word {0} are {1}".format(word, antonyms[:3]))
        return possibleReplacements
    return possibleReplacements
    
def posApprovedReplacements(alternativeWords, userTokens, indexOfToken):
    if(alternativeWords == []):
        return []
    tempTokens = userTokens[:]
    POSTokens = getPOSTags(tempTokens)
    validWords = []
    
    mainTag = POSTokens[indexOfToken][1]
    mainWord = userTokens[indexOfToken]
    
    for ind,word in enumerate(alternativeWords):
        tempTokens[indexOfToken] = word
        posTags = getPOSTags(tempTokens)
        newTag = (posTags[indexOfToken])[1]
        
        if(str(newTag) == str(mainTag)):
            print("Word {0}[{1}] replaced with {2}[{3}]".format(mainWord, mainTag, word,newTag))
            validWords.append(word)
    return validWords
        
    
    
def getAlternativeSentences(sentenceObj):
    mainSentence = sentenceObj.ogSentence;
    mainSentiment = sentenceObj.ogSentiment;
    
    sentenceTokens = cleanAndTokenizeText(mainSentence)
   
    for ind, word in enumerate(sentenceTokens):
        alternativeStrings = []
        alternativeHTMLs = []
        
        score = senty.polarity_scores(word)['compound']
        copyOfTokens = sentenceTokens[:]
        replacements = []
        if(score != 0.0):
            tempReplacements = listReplacements(word) # get embedding based relations
            if(tempReplacements == []):
                print("No replacements found at all for word {0}".format(word))
                continue
            replacements = posApprovedReplacements(tempReplacements[:], copyOfTokens[:], ind)
            if(replacements == []):
                print(" -- No POS approved words! -- for word {0}\n some non-POS:{1}".format(word, tempReplacements[:4]))
                continue
            sentenceObj.addAlternativesByIndex(ind, replacements)
            
            ## Generate new sentences by switching that word
            for newWord in replacements:
                htmlFriendlyTokens = copyOfTokens[:]
                copyOfTokens[ind] = newWord
                htmlFriendlyTokens[ind] = cstr(" <i>[{0}]</i>".format(newWord), "blue");
                newString = ' '.join(copyOfTokens)
                alternativeStrings.append(newString);
                alternativeHTMLs.append(' '.join(htmlFriendlyTokens))
        sentenceObj.addAlternativeStrings(alternativeStrings)
        sentenceObj.addHTMLStrings(alternativeHTMLs)
    return sentenceObj



## Testing Code


In [31]:
specificWord = "good"
def testOneWord(word=""):
    if(word==""):
        return
    print(word)


In [34]:
specificString = ""
def specificString(textString=""):
    if(textString == "" or textString == None):
        return
    mainSentiment = senty.polarity_scores(textString)['compound']
    if(mainSentiment == 0):
        print("No sentiment found in sentence");
        return;
    print("\n {0}:{1}\n".format(textString,mainSentiment))   
    sentenceObj = Sentence(textString, mainSentiment)
    sentenceObj = getAlternativeSentences(sentenceObj)
    
    alternateTweets = (sentenceObj.alternateSentences)[:]
    if(alternateTweets == [] or alternateTweets == None):
        print(" -- No new Strings generated ---\n\n")
        return
        #         sentenceObj = shiftSentiment(sentenceObj, True);
#         printStrings(sentenceObj)
    sentenceObj = shiftSentiment(sentenceObj, False);
    printStrings(sentenceObj)
    
specificString("You are such a wonderful person for looking at this poster!")
specificString("You are such a horrible person for looking at this poster")
specificString("You are such a good and nice person for looking at this poster!")


 You are such a wonderful person for looking at this poster!:0.6114

No antonyms found for word wonderful
Word wonderful[JJ] replaced with amazing[JJ]
Word wonderful[JJ] replaced with terrific[JJ]
Word wonderful[JJ] replaced with lovely[JJ]
Word wonderful[JJ] replaced with marvelous[JJ]
Word wonderful[JJ] replaced with beautiful[JJ]
Word wonderful[JJ] replaced with fantastic[JJ]
Word wonderful[JJ] replaced with fabulous[JJ]



 You are such a horrible person for looking at this poster:-0.5423

No antonyms found for word horrible
Word horrible[JJ] replaced with awful[JJ]
Word horrible[JJ] replaced with terrible[JJ]
Word horrible[JJ] replaced with horrendous[JJ]
Word horrible[JJ] replaced with dreadful[JJ]
Word horrible[JJ] replaced with horrific[JJ]
Word horrible[JJ] replaced with appalling[JJ]



 You are such a good and nice person for looking at this poster!:0.7177

Some antonyms for word good are ['badness', 'bad']
Word good[JJ] replaced with sure[JJ]
Word good[JJ] replaced with bad[JJ]
Word good[JJ] replaced with evil[JJ]
Some antonyms for word nice are []
Word nice[JJ] replaced with good[JJ]
Word nice[JJ] replaced with happy[JJ]
Word nice[JJ] replaced with perfect[JJ]
Word nice[JJ] replaced with pretty[JJ]
Word nice[JJ] replaced with wonderful[JJ]
Word nice[JJ] replaced with lovely[JJ]
Word nice[JJ] replaced with guy[JJ]
Word nice[JJ] replaced with nasty[JJ]


## Main Cell


In [6]:
def shiftSentiment(sentenceObj, positive=True):
    
    actualTweet = sentenceObj.ogSentence;
    mainSentiment = sentenceObj.ogSentiment;
    alternateTweets = sentenceObj.alternateSentences;
    
    
    happiestTweet = ""
    saddestTweet = ""
    happiestScore = -sys.maxsize - 1
    saddestScore = sys.maxsize
    correctTweets = []
    
    for tweet in alternateTweets:
        newSenty = senty.polarity_scores(tweet)['compound']
        
        if(newSenty < saddestScore):
            saddestTweet = tweet
            saddestScore = newSenty
        if(newSenty > happiestScore):
            happiestTweet = tweet
            happiestScore = newSenty
            
        if(positive):
            if(newSenty > mainSentiment):
                correctTweets.append(tweet)
                continue
            
            elif(newSenty < mainSentiment):
                continue
                # Grab happiest tweet and if it's not "", then generate more happy tweets from it
        if(positive == False):
            if(newSenty < mainSentiment):
                correctTweets.append(tweet)
                continue
            
            elif(newSenty > mainSentiment):
                continue
                # grab happiest tweet, and if it's not "", then generate more happy tweets from it
                
#     if(correctTweets == []):
#         print("\n\nNo tweets found when trying to do Positive={}\n\n".format(positive))
    
    sentenceObj.addFinalSentences(correctTweets);
    return sentenceObj;
        
def printStrings(sentenceObj):
    newStringsHTML = sentenceObj.alternateHTMLs;
    newStrings = sentenceObj.alternateSentences;
    mainSentiment = sentenceObj.ogSentiment;    
    for ind,alteredTweet in enumerate(newStrings):
            sentimentOfNewString = senty.polarity_scores(alteredTweet)['compound']
            htmlText = newStringsHTML[ind]
            if(sentimentOfNewString == mainSentiment or sentimentOfNewString == 0):
                display(HTML(cstr("{0}: {1}".format(htmlText,sentimentOfNewString),'DarkGray')))
            elif(sentimentOfNewString > mainSentiment):
                display(HTML(cstr("{0}: {1}".format(htmlText,sentimentOfNewString),'green')))
            else:
                display(HTML(cstr("{0}: {1}".format(htmlText,sentimentOfNewString),'red')))

In [27]:
def runThroughTweets():
    
    tweets_df = pd.read_csv( pathToDatasets + 'cleanedTweets.csv' , nrows=NUM_OF_TWEETS, skiprows=TWEET_START)

    tweets = tweets_df.values

    listOfObjects = []
    for counter,tweet in enumerate(tweets):
        tweet = tweet[0]
        mainSentiment = senty.polarity_scores(tweet)['compound']
        if(mainSentiment == 0):
            continue
        print("\n {0}:{1}\n".format(tweet,mainSentiment))   
        sentenceObj = Sentence(tweet, mainSentiment)
        sentenceObj = getAlternativeSentences(sentenceObj)
        alternateTweets = (sentenceObj.alternateSentences)[:]
        if(alternateTweets == [] or alternateTweets == None):
            print(" -- No new Strings generated ---\n\n")
            continue
#         sentenceObj = shiftSentiment(sentenceObj, True);
#         printStrings(sentenceObj)
        sentenceObj = shiftSentiment(sentenceObj, False);
        printStrings(sentenceObj)
    
runThroughTweets()
        
        
    


 broadband plan a massive broken promise  via  still waiting for broadband we are :-0.2023

Some antonyms for word broken are ['keep', 'repair', 'make']
Word broken[NN] replaced with breaking[NN]
Word broken[NN] replaced with broke[NN]
Word broken[NN] replaced with apart[NN]
Word broken[NN] replaced with neck[NN]
Word broken[NN] replaced with keep[NN]
Word broken[NN] replaced with repair[NN]
Word broken[NN] replaced with make[NN]
Word broken[NN] replaced with promote[NN]
Word broken[NN] replaced with conform_to[NN]
No antonyms found for word promise
Word promise[NN] replaced with pledge[NN]
Word promise[NN] replaced with commitment[NN]
Word promise[NN] replaced with hope[NN]
Word promise[NN] replaced with give[NN]
Word promise[NN] replaced with desire[NN]



  wow tons of replies from you may have to unfollow so i can see my friends tweets you are scrolling the feed a lot. :0.7845

No antonyms found for word wow
Word wow[JJ] replaced with c'mon[JJ]
Word wow[JJ] replaced with hey[JJ]
Word wow[JJ] replaced with gosh[JJ]
Word wow[JJ] replaced with whew[JJ]
Some antonyms for word friends are ['stranger', 'foe']
Word friends[NNS] replaced with parents[NNS]
Word friends[NNS] replaced with relatives[NNS]
Word friends[NNS] replaced with others[NNS]



 put vacation photos online a few yrs ago. pc crashed and now i forget the name of the site. :-0.2263

Some antonyms for word forget are ['mind', 'remember']
Word forget[VBP] replaced with remember[VBP]
Word forget[VBP] replaced with tell[VBP]
Word forget[VBP] replaced with imagine[VBP]
Word forget[VBP] replaced with know[VBP]
Word forget[VBP] replaced with mind[VBP]
Word forget[VBP] replaced with remember[VBP]



 i need a hug :0.4767

No antonyms found for word hug
Word hug[NN] replaced with hugs[NN]
Word hug[NN] replaced with kiss[NN]
Word hug[NN] replaced with hugged[NN]
Word hug[NN] replaced with goodbye[NN]



  not sure what they are only that they are pos! as much as i want to i dont think can trade away company assets sorry andy! :-0.2134

Some antonyms for word sure are ['unsure', 'uncertain']
Word sure[JJ] replaced with unsure[JJ]
Word sure[JJ] replaced with uncertain[JJ]
No antonyms found for word want
Word want[VBP] replaced with do[VBP]
Word want[VBP] replaced with know[VBP]
Word want[VBP] replaced with get[VBP]
Some antonyms for word assets are ['liability']
Word assets[NNS] replaced with investments[NNS]
Word assets[NNS] replaced with funds[NNS]
Word assets[NNS] replaced with debts[NNS]
Some antonyms for word sorry are ['unregretful']
Word sorry[VBP] replaced with 'm[VBP]
Word sorry[VBP] replaced with glad[VBP]
Word sorry[VBP] replaced with regret[VBP]



  i hate when that happens... :-0.5719

Some antonyms for word hate are ['love']
Word hate[NN] replaced with anyone[NN]
Word hate[NN] replaced with crime[NN]



 i have a sad feeling that dallas is not going to show up  i gotta say though you would think more shows would use music from the game. mmm:-0.3818

Some antonyms for word sad are ['glad']
Word sad[JJ] replaced with awful[JJ]
Word sad[JJ] replaced with tragic[JJ]
Word sad[JJ] replaced with horrible[JJ]
Word sad[JJ] replaced with happy[JJ]
Word sad[JJ] replaced with poignant[JJ]
Word sad[JJ] replaced with glad[JJ]
No antonyms found for word feeling
Word feeling[NN] replaced with feel[NN]
Word feeling[NN] replaced with felt[NN]
Word feeling[NN] replaced with sense[NN]



 where did u move to  i thought u were already in sd.  hmmm. random u found me. glad to hear yer doing well.:0.6249

Some antonyms for word glad are ['sad']
Word glad[NN] replaced with i[NN]
Some antonyms for word well are ['disadvantageously', 'badly', 'ill']
Word well[RB] replaced with so[RB]
Word well[RB] replaced with even[RB]
Word well[RB] replaced with disadvantageously[RB]
Word well[RB] replaced with badly[RB]



  i miss my ps3 it is out of commission  wutcha playing have you copped blood on the sand:0.0516

Some antonyms for word miss are ['have', 'attend_to', 'attend']
Word miss[VBP] replaced with play[VBP]
Word miss[VBP] replaced with have[VBP]
Word miss[VBP] replaced with attend_to[VBP]
Word miss[VBP] replaced with attend[VBP]
No antonyms found for word playing
Word playing[VBG] replaced with having[VBG]



 the life is cool. but not for me. :0.1655

Some antonyms for word cool are ['warm', 'heat']
Word cool[JJ] replaced with hot[JJ]
Word cool[JJ] replaced with warm[JJ]
Word cool[JJ] replaced with dry[JJ]
Word cool[JJ] replaced with cold[JJ]
Word cool[JJ] replaced with chill[JJ]
Word cool[JJ] replaced with cooler[JJ]
Word cool[JJ] replaced with heat[JJ]
Word cool[JJ] replaced with warm[JJ]
Word cool[JJ] replaced with heat[JJ]



 sadly though i have never gotten to experience the post coitus cigarette before and now i never will. :-0.4215

Some antonyms for word sadly are ['happily']
Word sadly[RB] replaced with thankfully[RB]
Word sadly[RB] replaced with terribly[RB]
Word sadly[RB] replaced with frankly[RB]
Word sadly[RB] replaced with curiously[RB]
Word sadly[RB] replaced with painfully[RB]
Word sadly[RB] replaced with strangely[RB]
Word sadly[RB] replaced with happily[RB]



 i had such a nice day. too bad the rain comes in tomorrow at 5am :-0.1779

Some antonyms for word nice are ['nasty']
Word nice[JJ] replaced with good[JJ]
Word nice[JJ] replaced with happy[JJ]
Word nice[JJ] replaced with perfect[JJ]
Word nice[JJ] replaced with pretty[JJ]
Word nice[JJ] replaced with wonderful[JJ]
Word nice[JJ] replaced with lovely[JJ]
Word nice[JJ] replaced with guy[JJ]
Word nice[JJ] replaced with nasty[JJ]
Some antonyms for word bad are ['unregretful', 'good', 'goodness']
Word bad[JJ] replaced with good[JJ]
Word bad[JJ] replaced with unregretful[JJ]
Word bad[JJ] replaced with good[JJ]
Word bad[JJ] replaced with goodness[JJ]



  too bad i will not be around i lost my job and can not even pay my phone bill lmao aw shucks :0.3935

Some antonyms for word bad are ['unregretful', 'good', 'goodness']
Word bad[JJ] replaced with good[JJ]
Word bad[JJ] replaced with unregretful[JJ]
Word bad[JJ] replaced with good[JJ]
Word bad[JJ] replaced with goodness[JJ]
Some antonyms for word lost are ['profit', 'gain', 'won']
Word lost[VBD] replaced with won[VBD]
Word lost[VBD] replaced with won[VBD]
Word lost[VBD] replaced with found[VBD]
Word lost[VBD] replaced with saved[VBD]
Some antonyms for word pay are ['default']
Word pay[VB] replaced with cost[VB]
Word pay[VB] replaced with receive[VB]
Word pay[VB] replaced with default[VB]
 --- lmao not in vocabulary ---
No replacements found at all for word lmao



 mo jobs no money.  how in the hell is min wage here 4 fn clams an hour:-0.7783

Some antonyms for word no are ['yes', 'all']
Word no[DT] replaced with any[DT]
Word no[DT] replaced with all[DT]
Some antonyms for word hell are ['Heaven']
Word hell[NN] replaced with heaven[NN]
Word hell[NN] replaced with crazy[NN]



  agreed i saw the failwhale allllll day today. :0.2732

Some antonyms for word agreed are ['disagree']
 -- No POS approved words! -- for word agreed
 some non-POS:['agree', 'agreement', 'decided', 'agreeing']
 -- No new Strings generated ---



  oh! haha... dude i dont really look at em unless someone says hey i added you. sorry  i am so terrible at that. i need a pop up!:-0.6459

No antonyms found for word haha
Word haha[NN] replaced with prude[NN]
Word haha[NN] replaced with porpoise[NN]
Word haha[NN] replaced with será[NN]
Word haha[NN] replaced with clavaria[NN]
Word haha[NN] replaced with thunderer[NN]
Some antonyms for word sorry are ['unregretful']
Word sorry[VB] replaced with glad[VB]
Word sorry[VB] replaced with sad[VB]
Word sorry[VB] replaced with regret[VB]
No antonyms found for word terrible
Word terrible[JJ] replaced with horrible[JJ]
Word terrible[JJ] replaced with awful[JJ]
Word terrible[JJ] replaced with dreadful[JJ]
Word terrible[JJ] replaced with horrendous[JJ]
Wor


  i am sure you are right...    i need to start working out with you and the nikster... or jared at least!:0.3802

Some antonyms for word sure are ['unsure', 'uncertain']
Word sure[JJ] replaced with uncertain[JJ]



 i really hate how people diss my bands!  trace is clearly not ugly!:0.2484

Some antonyms for word hate are ['love']
Word hate[VB] replaced with racist[VB]
Word hate[VB] replaced with racism[VB]
Word hate[VB] replaced with fear[VB]
Word hate[VB] replaced with bigotry[VB]
Word hate[VB] replaced with love[VB]
Some antonyms for word clearly are ['unintelligibly']
Word clearly[RB] replaced with obviously[RB]
Word clearly[RB] replaced with certainly[RB]
Word clearly[RB] replaced with indeed[RB]
Word clearly[RB] replaced with nonetheless[RB]
Word clearly[RB] replaced with unintelligibly[RB]
Some antonyms for word ugly are ['beautiful']
 -- No POS approved words! -- for word ugly
 some non-POS:['nasty', 'awful', 'horrible', 'stupid']



 gym attire today was puma singlet adidas shorts.......and black business socks and leather shoes  lucky did not run into any cute girls.:0.7003

Some antonyms for word lucky are ['unlucky']
Word lucky[RB] replaced with unlucky[RB]
Word lucky[RB] replaced with maybe[RB]
Word lucky[RB] replaced with unlucky[RB]
No antonyms found for word cute
Word cute[JJ] replaced with adorable[JJ]
Word cute[JJ] replaced with goofy[JJ]
Word cute[JJ] replaced with sexy[JJ]
Word cute[JJ] replaced with cuddly[JJ]
Word cute[JJ] replaced with funny[JJ]
Word cute[JJ] replaced with perky[JJ]
Word cute[JJ] replaced with naughty[JJ]
