In [1]:
import gensim
from sklearn.manifold import TSNE
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
import gensim.downloader as api
import numpy as np
import re
import csv

import pandas as pd
import pprint

import string
import nltk


from IPython.display import HTML


In [2]:
pathToDatasets = '../datasets/'
filePath = '../datasets/GoogleNews-vectors-negative300.bin'
word_vectors = api.load("glove-wiki-gigaword-100")
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /home/ubuntu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [9]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize



senty = SentimentIntensityAnalyzer()
vocabulary = word_vectors.vocab;


NUMBER_OF_ALTERNATIVES = 7
TWEET_START = 50
NUM_OF_TWEETS = 50

In [4]:
def cstr(s, color='black'):
    return "<text style=color:{}>{}</text>".format(color, s)

## Utility Code

In [5]:
def cleanAndTokenizeText(text):
    text = text.lower()
    newString = ""
    for char in text:
        if char not in string.punctuation:
            newString += char
    text = word_tokenize(newString)
    return text;

def listReplacements(word):
    if(word not in vocabulary):
        return []
    possibleReplacements = [word[0] for word in word_vectors.most_similar(word,topn=NUMBER_OF_ALTERNATIVES)]
    return possibleReplacements

def getPOSTags(tweet):
    tags = nltk.pos_tag(tweet)
    return(tags)

## NLP Utility Code

In [11]:
def listReplacements(word):
    if(word not in vocabulary):
        return []
    possibleReplacements = [word[0] for word in word_vectors.most_similar(word,topn=NUMBER_OF_ALTERNATIVES)]
    return possibleReplacements


def posApprovedReplacements(alternativeWords, userTokens, indexOfToken):
    if(alternativeWords == []):
        print("--- No alternative words! ---")
        return []
    tempTokens = userTokens[:]
    truePOSTokens = getPOSTags(tempTokens)
    validWords = []
    
    mainTag = truePOSTokens[indexOfToken][1]
    mainWord = userTokens[indexOfToken]
    
    for ind,word in enumerate(alternativeWords):
        tempTokens[indexOfToken] = word
        posTags = getPOSTags(tempTokens)
        newTag = (posTags[indexOfToken])[1]

        if(str(newTag) == str(mainTag)):
            print("Word {0}[{1}] replaced with {2}[{3}]".format(mainWord, mainTag, word,newTag))
            validWords.append(word)
    return validWords
        

In [18]:
def getAlternativeSentences(tweet, sentimentOfTweet):
    userInputTokens = cleanAndTokenizeText(tweet)
    
   
    alternativeStrings = []
    for ind,word in enumerate(userInputTokens):
        
        score = senty.polarity_scores(word)['compound'] # get the aggregated score!
        newUserTokens = userInputTokens[:]
        
        if(score != 0.0):
            replacements = listReplacements(word)       
            replacements = posApprovedReplacements(replacements[:], newUserTokens[:], ind)
            if(replacements == []):
                print("--- No pos approved words! ---")
                continue          
            for newWord in replacements:
                
                newUserTokens[ind] = cstr(" <i>[{0}]</i> ".format(newWord), 'blue');
                newString = ' '.join(newUserTokens)
                sentimentOfNewString = senty.polarity_scores(newString)['compound']
             
            
#                 if(sentimentOfNewString == 0):
#                     continue
                
                alternativeStrings.append(newString)
    return alternativeStrings;

## Main Cells

In [22]:
def runThroughTweets():

    tweets_df = pd.read_csv( pathToDatasets + 'cleanedTweets.csv' , nrows=NUM_OF_TWEETS, skiprows=TWEET_START)

    tweets = tweets_df.values

    
    for counter,tweet in enumerate(tweets):
        tweet = tweet[0]
        mainSentiment = senty.polarity_scores(tweet)['compound']
        if(mainSentiment == 0):
            continue
        print("\nNUM({2}) {0}:{1}\n".format(tweet,mainSentiment, counter))       
        newStrings = getAlternativeSentences(tweet, mainSentiment)
        if(newStrings == [] or newStrings == None):
            print("--- No new strings generated ---\n\n")
            continue
        for alteredTweet in newStrings:
            sentimentOfNewString = senty.polarity_scores(alteredTweet)['compound']
            if(sentimentOfNewString == mainSentiment or sentimentOfNewString == 0):
                display(HTML(cstr("{0}: {1}".format(alteredTweet,sentimentOfNewString),'DarkGray')))
            elif(sentimentOfNewString > mainSentiment):
                display(HTML(cstr("{0}: {1}".format(alteredTweet,sentimentOfNewString),'green')))
            else:
                display(HTML(cstr("{0}: {1}".format(alteredTweet,sentimentOfNewString),'red')))
    
    
runThroughTweets()


NUM(0) broadband plan 'a massive broken promise'  via  still waiting for broadband we are :-0.2023

Word broken[NN] replaced with breaking[NN]
Word broken[NN] replaced with broke[NN]
Word broken[NN] replaced with apart[NN]
Word broken[NN] replaced with neck[NN]
Word promise[NN] replaced with pledge[NN]
Word promise[NN] replaced with commitment[NN]
Word promise[NN] replaced with hope[NN]
Word promise[NN] replaced with give[NN]
Word promise[NN] replaced with desire[NN]



NUM(1)  wow tons of replies from you may have to unfollow so i can see my friends' tweets you're scrolling the feed a lot. :0.7845

Word wow[JJ] replaced with c'mon[JJ]
Word wow[JJ] replaced with hey[JJ]
Word wow[JJ] replaced with gosh[JJ]
Word wow[JJ] replaced with whew[JJ]
Word friends[NNS] replaced with parents[NNS]
Word friends[NNS] replaced with relatives[NNS]
Word friends[NNS] replaced with others[NNS]



NUM(3) put vacation photos online a few yrs ago. pc crashed and now i forget the name of the site. :-0.2263

Word forget[VBP] replaced with remember[VBP]
Word forget[VBP] replaced with tell[VBP]
Word forget[VBP] replaced with imagine[VBP]
Word forget[VBP] replaced with know[VBP]



NUM(4) i need a hug :0.4767

Word hug[NN] replaced with hugs[NN]
Word hug[NN] replaced with kiss[NN]
Word hug[NN] replaced with hugged[NN]
Word hug[NN] replaced with goodbye[NN]



NUM(5)  not sure what they are only that they are pos! as much as i want to i dont think can trade away company assets sorry andy! :-0.2134

--- No pos approved words! ---
Word want[VBP] replaced with do[VBP]
Word want[VBP] replaced with know[VBP]
Word want[VBP] replaced with get[VBP]
Word assets[NNS] replaced with investments[NNS]
Word assets[NNS] replaced with funds[NNS]
Word assets[NNS] replaced with debts[NNS]
Word sorry[VBP] replaced with 'm[VBP]
Word sorry[VBP] replaced with glad[VBP]
Word sorry[VBP] replaced with sad[VBP]
Word sorry[VBP] replaced with regret[VBP]



NUM(6)  i hate when that happens... :-0.5719

Word hate[NN] replaced with anyone[NN]
Word hate[NN] replaced with crime[NN]



NUM(7) i have a sad feeling that dallas is not going to show up  i gotta say though you'd think more shows would use music from the game. mmm:-0.3818

Word sad[JJ] replaced with awful[JJ]
Word sad[JJ] replaced with tragic[JJ]
Word sad[JJ] replaced with horrible[JJ]
Word sad[JJ] replaced with happy[JJ]
Word sad[JJ] replaced with poignant[JJ]
Word feeling[NN] replaced with feel[NN]
Word feeling[NN] replaced with felt[NN]
Word feeling[NN] replaced with sense[NN]



NUM(9) where did u move to?  i thought u were already in sd. ?? hmmm. random u found me. glad to hear yer doing well.:0.6848

Word glad[JJ] replaced with thankful[JJ]
Word glad[JJ] replaced with pleased[JJ]
Word glad[JJ] replaced with happy[JJ]
Word well[RB] replaced with so[RB]
Word well[RB] replaced with even[RB]



NUM(10)  i miss my ps3 it's out of commission  wutcha playing? have you copped 'blood on the sand'?:0.1431

Word miss[VBP] replaced with play[VBP]
Word playing[VBG] replaced with having[VBG]



NUM(12) the life is cool. but not for me. :0.1655

Word cool[JJ] replaced with hot[JJ]
Word cool[JJ] replaced with warm[JJ]
Word cool[JJ] replaced with dry[JJ]
Word cool[JJ] replaced with cold[JJ]
Word cool[JJ] replaced with chill[JJ]
Word cool[JJ] replaced with heat[JJ]



NUM(13) sadly though i've never gotten to experience the post coitus cigarette before and now i never will. :-0.4215

Word sadly[RB] replaced with thankfully[RB]
Word sadly[RB] replaced with terribly[RB]
Word sadly[RB] replaced with frankly[RB]
Word sadly[RB] replaced with curiously[RB]
Word sadly[RB] replaced with painfully[RB]
Word sadly[RB] replaced with strangely[RB]



NUM(14) i had such a nice day. too bad the rain comes in tomorrow at 5am :-0.1779

Word nice[JJ] replaced with good[JJ]
Word nice[JJ] replaced with happy[JJ]
Word nice[JJ] replaced with perfect[JJ]
Word nice[JJ] replaced with pretty[JJ]
Word nice[JJ] replaced with wonderful[JJ]
Word nice[JJ] replaced with lovely[JJ]
Word nice[JJ] replaced with guy[JJ]
Word bad[JJ] replaced with good[JJ]



NUM(15)  too bad i won't be around i lost my job and can't even pay my phone bill lmao aw shucks :0.3935

Word bad[JJ] replaced with good[JJ]
Word lost[VBD] replaced with won[VBD]
Word pay[VB] replaced with cost[VB]
Word pay[VB] replaced with receive[VB]
--- No alternative words! ---
--- No pos approved words! ---



NUM(17) mo jobs no money.  how in the hell is min wage here 4 f'n clams an hour?:-0.7783

Word no[DT] replaced with any[DT]
Word hell[NN] replaced with heaven[NN]
Word hell[NN] replaced with crazy[NN]



NUM(19)  agreed i saw the failwhale allllll day today. :0.2732

--- No pos approved words! ---
--- No new strings generated ---



NUM(20)  oh! haha... dude i dont really look at em unless someone says hey i added you. sorry  i'm so terrible at that. i need a pop up!:-0.6459

Word haha[JJ] replaced with será[JJ]
Word sorry[VBP] replaced with 'm[VBP]
Word sorry[VBP] replaced with glad[VBP]
Word sorry[VBP] replaced with sad[VBP]
Word sorry[VBP] replaced with regret[VBP]
Word terrible[JJ] replaced with horrible[JJ]
Word terrible[JJ] replaced with awful[JJ]
Word terrible[JJ] replaced with dreadful[JJ]
Word terrible[JJ] replaced with horrendous[JJ]
Word terrible[JJ] replaced with horrific[JJ]
Word terrible[JJ] replaced with tragic[JJ]



NUM(21)  i'm sure you're right...    i need to start working out with you and the nikster... or jared at least!:0.3802

--- No pos approved words! ---
--- No new strings generated ---



NUM(22) i really hate how people diss my bands!  trace is clearly not ugly!:0.2484

Word hate[VB] replaced with racist[VB]
Word hate[VB] replaced with racism[VB]
Word hate[VB] replaced with fear[VB]
Word hate[VB] replaced with bigotry[VB]
Word clearly[RB] replaced with obviously[RB]
Word clearly[RB] replaced with certainly[RB]
Word clearly[RB] replaced with indeed[RB]
Word clearly[RB] replaced with nonetheless[RB]
--- No pos approved words! ---



NUM(23) gym attire today was: puma singlet adidas shorts.......and black business socks and leather shoes  lucky did not run into any cute girls.:0.7003

Word lucky[RB] replaced with unlucky[RB]
Word lucky[RB] replaced with maybe[RB]
Word cute[JJ] replaced with adorable[JJ]
Word cute[JJ] replaced with goofy[JJ]
Word cute[JJ] replaced with sexy[JJ]
Word cute[JJ] replaced with cuddly[JJ]
Word cute[JJ] replaced with funny[JJ]
Word cute[JJ] replaced with perky[JJ]
Word cute[JJ] replaced with naughty[JJ]



NUM(25) no picnic  my phone smells like citrus.:0.0772

Word no[DT] replaced with any[DT]
--- No pos approved words! ---



NUM(26)  my donkey is sensitive about such comments. nevertheless he'd (and me'd) be glad to see your mug asap. charger is still awol. :0.1779

Word glad[JJ] replaced with thankful[JJ]
Word glad[JJ] replaced with happy[JJ]
Word glad[JJ] replaced with i[JJ]
Word awol[JJ] replaced with unacknowledged[JJ]
Word awol[JJ] replaced with unmentioned[JJ]
Word awol[JJ] replaced with unreported[JJ]
Word awol[JJ] replaced with berserk[JJ]
Word awol[JJ] replaced with unrewarded[JJ]



NUM(27) no new csi tonight.  fml:-0.296

Word no[DT] replaced with any[DT]



NUM(28) i think my arms are sore from tennis :-0.3612

--- No pos approved words! ---
--- No new strings generated ---



NUM(29) wonders why someone that u like so much can make you so unhappy in a split seccond . depressed . :-0.6615

--- No pos approved words! ---
Word unhappy[JJ] replaced with disappointed[JJ]
Word unhappy[JJ] replaced with anxious[JJ]
Word unhappy[JJ] replaced with happy[JJ]
Word depressed[VBD] replaced with distressed[VBD]
Word depressed[VBD] replaced with worried[VBD]



NUM(30) sleep soon... i just hate saying bye and see you tomorrow for the night. :-0.5719

--- No pos approved words! ---
--- No new strings generated ---



NUM(31)  just got ur newsletter those fares really are unbelievable shame i already booked and paid for mine :-0.315

Word unbelievable[JJ] replaced with incredible[JJ]
Word unbelievable[JJ] replaced with awesome[JJ]
Word unbelievable[JJ] replaced with fantastic[JJ]
Word unbelievable[JJ] replaced with awful[JJ]
Word unbelievable[JJ] replaced with phenomenal[JJ]
Word shame[NN] replaced with disgrace[NN]
Word shame[NN] replaced with pity[NN]
Word shame[NN] replaced with sorrow[NN]
Word shame[NN] replaced with disgust[NN]
Word shame[NN] replaced with despair[NN]



NUM(34) damn... i don't have any chalk! my chalkboard is useless :-0.4753

Word damn[NN] replaced with darn[NN]
Word damn[NN] replaced with yeah[NN]
Word damn[NN] replaced with heck[NN]
Word useless[JJ] replaced with irrelevant[JJ]
Word useless[JJ] replaced with meaningless[JJ]
Word useless[JJ] replaced with superfluous[JJ]
Word useless[JJ] replaced with impractical[JJ]
Word useless[JJ] replaced with pointless[JJ]
Word useless[JJ] replaced with worthless[JJ]
Word useless[JJ] replaced with harmless[JJ]



NUM(35) had a blast at the getty villa but hates that she's had a sore throat all day. it's just getting worse too :-0.9052

Word hates[VBZ] replaced with loves[VBZ]
Word hates[VBZ] replaced with thinks[VBZ]
Word hates[VBZ] replaced with cares[VBZ]
Word hates[VBZ] replaced with despises[VBZ]
Word hates[VBZ] replaced with likes[VBZ]
Word hates[VBZ] replaced with adores[VBZ]
Word sore[NN] replaced with shoulder[NN]
Word sore[NN] replaced with tendinitis[NN]
Word sore[NN] replaced with knee[NN]
Word sore[NN] replaced with groin[NN]
--- No pos approved words! ---



NUM(36)  hey missed ya at the meeting  sup mama:-0.296

Word missed[VBD] replaced with scored[VBD]
Word missed[VBD] replaced with shot[VBD]
Word missed[VBD] replaced with got[VBD]



NUM(37) my tummy hurts.  i wonder if the hypnosis has anything to do with it? if so it's working i get it stop smoking!!!:-0.7332

Word hurts[VBZ] replaced with weakens[VBZ]
Word hurts[VBZ] replaced with pleases[VBZ]
--- No pos approved words! ---



NUM(39)  sorry babe!!  my fam annoys me too. thankfully they're asleep right now. muahaha. *evil laugh*:-0.2225

Word sorry[NN] replaced with glad[NN]
Word sorry[NN] replaced with regret[NN]
--- No pos approved words! ---
Word thankfully[RB] replaced with luckily[RB]
Word thankfully[RB] replaced with fortunately[RB]
Word thankfully[RB] replaced with sadly[RB]
Word thankfully[RB] replaced with regrettably[RB]
Word thankfully[RB] replaced with admittedly[RB]
Word thankfully[RB] replaced with oftentimes[RB]
Word evil[NNS] replaced with enemies[NNS]
Word evil[NNS] replaced with god[NNS]
Word evil[NNS] replaced with demons[NNS]
Word evil[NNS] replaced with beings[NNS]
--- No pos approved words! ---



NUM(42) poor cameron (the hills) :-0.4767

Word poor[JJ] replaced with bad[JJ]
Word poor[JJ] replaced with low[JJ]



NUM(43) pray for me please the ex is threatening to start sh** at my/our babies 1st birthday party. what a jerk. and i still have a headache :0.128

Word pray[NN] replaced with prayed[NN]
Word pray[NN] replaced with god[NN]
Word pray[NN] replaced with mourn[NN]
Word pray[NN] replaced with prayer[NN]
Word please[VB] replaced with wish[VB]
Word please[VB] replaced with call[VB]
Word please[VB] replaced with forget[VB]
Word threatening[VBG] replaced with causing[VBG]
Word party[NN] replaced with opposition[NN]
Word party[NN] replaced with coalition[NN]
Word party[NN] replaced with leader[NN]
Word party[NN] replaced with election[NN]
Word jerk[NN] replaced with snatch[NN]
Word jerk[NN] replaced with kgs[NN]
Word jerk[NN] replaced with lifter[NN]
Word jerk[NN] replaced with goetschl[NN]



NUM(44)  hmm   do u really enjoy being with him ? if the problems are too constants u should think things more  find someone ulike:0.2006

Word enjoy[VB] replaced with prefer[VB]
Word enjoy[VB] replaced with feel[VB]
Word problems[NNS] replaced with difficulties[NNS]
Word problems[NNS] replaced with troubles[NNS]
Word problems[NNS] replaced with concerns[NNS]



NUM(45) strider is a sick little puppy  :-0.5106

Word sick[JJ] replaced with ill[JJ]
Word sick[JJ] replaced with pregnant[JJ]
Word sick[JJ] replaced with elderly[JJ]
Word sick[JJ] replaced with infected[JJ]
Word sick[JJ] replaced with tired[JJ]



NUM(46) so ryleegrace...wana go steve's party or not?? sadly since its easter i wnt b able 2 do much  but ohh well.....:0.4359

Word party[NN] replaced with opposition[NN]
Word party[NN] replaced with coalition[NN]
Word party[NN] replaced with leader[NN]
Word party[NN] replaced with election[NN]
Word party[NN] replaced with candidate[NN]
Word sadly[RB] replaced with thankfully[RB]
Word sadly[RB] replaced with terribly[RB]
Word sadly[RB] replaced with frankly[RB]
Word sadly[RB] replaced with curiously[RB]
Word sadly[RB] replaced with painfully[RB]
Word sadly[RB] replaced with strangely[RB]
Word well[RB] replaced with so[RB]
Word well[RB] replaced with even[RB]



NUM(47) hey i actually won one of my bracket pools! too bad it wasn't the one for money :0.126

--- No pos approved words! ---
Word bad[JJ] replaced with good[JJ]



NUM(49) a bad nite for the favorite teams: astros and spartans lose.  the nite out with t.w. was good.:-0.0772

Word bad[JJ] replaced with good[JJ]
Word favorite[JJ] replaced with favourite[JJ]
Word favorite[JJ] replaced with popular[JJ]
Word favorite[JJ] replaced with famous[JJ]
Word lose[VBP] replaced with get[VBP]
Word lose[VBP] replaced with want[VBP]
Word good[JJ] replaced with sure[JJ]
