In [2]:
import re
import nltk
from nltk.stem import WordNetLemmatizer
import pandas as pd
lemmatizer = WordNetLemmatizer()

In [3]:
lemmatizer = WordNetLemmatizer()

In [4]:
lexicon = dict(zip(pd.read_csv('vader_lexicon.csv', encoding='cp437', header=None)[[0, 1]].values.T[0], pd.read_csv('vader_lexicon.csv', encoding='cp437', header=None)[[0, 1]].values.T[1]))

In [5]:
#Here are a list of words that, when preceding, require us to inverse our score. 
#"I like Apple" has a score of 1.5, but "I don't like Apple" would have a score of -1.5 because of the negation
negate = \
    ["aint", "arent", "cannot", "cant", "couldnt", "darent", "didnt", "doesnt",
     "ain't", "aren't", "can't", "couldn't", "daren't", "didn't", "doesn't",
     "dont", "hadnt", "hasnt", "havent", "isnt", "mightnt", "mustnt", "neither",
     "don't", "hadn't", "hasn't", "haven't", "isn't", "mightn't", "mustn't",
     "neednt", "needn't", "never", "none", "nope", "nor", "not", "nothing", "nowhere",
     "oughtnt", "shant", "shouldnt", "uhuh", "wasnt", "werent",
     "oughtn't", "shan't", "shouldn't", "uh-uh", "wasn't", "weren't",
     "without", "wont", "wouldnt", "won't", "wouldn't", "rarely", "seldom", "despite"]

boost = \
    ['absolutely', 'amazingly', 'awfully', 'completely', 'considerably', 'decidedly', 'deeply', 'effing', 'enormously',
     'entirely', 'especially', 'exceptionally', 'extremely', 'fabulously', 'flipping', 'flippin', 'fricking', 'frickin',
     'frigging', 'friggin', 'fully', 'fucking', 'greatly', 'hella', 'highly', 'hugely', 'incredibly', 'intensely', 'majorly',
     'more', 'most', 'particularly', 'purely', 'quite', 'really', 'remarkably', 'so', 'substantially',
     'thoroughly', 'totally', 'tremendously', 'uber', 'unbelievably', 'unusually', 'utterly', 'very', 'almost', 'barely']

In [6]:
class PreprocessText(object):
    """
    Sentiment relevant text properties. 
    """

    def __init__(self, text):

        self.text = text
        self.clean_text = self._clean_text()

    def _clean_text(self):

        wordz = self.text.split()

        lemmatizer = WordNetLemmatizer()
        words = []
        for word in wordz:
            words.append(lemmatizer.lemmatize(word))

        return words

In [7]:
class Sentiment(object):
    """
    Sentiment Analyzer
    """


    def __init__(self):

        self.lexicon = lexicon
        self.negate = negate
            
    def score(self, text):

        words = PreprocessText(text).clean_text
        sentiments = []
        for item in words:
            sentiments = self.sentiment_polarity(item, sentiments, words)
        polarity_calc = self.final_calculation(sentiments)
        return polarity_calc
    
    def sentiment_polarity(self, item , sentiments, words):
        #Checks the average sentiment score by querying our lexicon
        weight = 0
        item_lowercase = item.lower()
        if item_lowercase in self.lexicon:
            weight = self.lexicon[item_lowercase]
            final_weight = self.negation_check(words, item, weight)
            sentiments.append(final_weight)
        return sentiments
    
    
    def final_calculation(self, sentiments):
        #Applies all final calculations to get the final sentiment score
        final_calc = sum(sentiments) / len(sentiments)
        return final_calc / 5
    
    def negation_check(self, words, item, weight):
        #Checks to see if there is a negation word in the words
        #"I love apples = 3.2"
        #"I don't love apples = -3.2"
        neg_coef = 1
        lexicon_index = words.index(item)
        preceding_word = words[lexicon_index - 1]
        if preceding_word in self.negate:
            neg_coef = -1
        final_weight = weight * neg_coef
        return final_weight


In [178]:
"""Examples"""

'Examples'

In [15]:
analyzer = Sentiment()

In [16]:
analyzer.score('Apple is fine')

0.16

In [17]:
analyzer.score("apple isn't fine")

-0.16

In [18]:
analyzer.score('Google is an amazing stock!')

0.5599999999999999

In [19]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

sid.polarity_scores('i loved tesla')['compound']



0.5994

In [20]:
analyzer.score('i loved tesla') 

0.58

In [25]:
# our score compared with vader is similar if we simply divide by 5 as this is how vader does it.
# but we are currently working on reading to find a better way to standardize these scores.

analyzer.score(text = 'i love tesla')

0.64

In [33]:
sid.polarity_scores('apple is fine %')['compound']

0.2023

In [34]:
sid.polarity_scores('apple is fine')['compound']

0.2023

In [13]:
lexicon

{'$:': -1.5,
 '%)': -0.4,
 '%-)': -1.5,
 '&-:': -0.4,
 '&:': -0.7,
 "( '}{' )": 1.6,
 '(%': -0.9,
 "('-:": 2.2,
 "(':": 2.3,
 '((-:': 2.1,
 '(*': 1.1,
 '(-%': -0.7,
 '(-*': 1.3,
 '(-:': 1.6,
 '(-:0': 2.8,
 '(-:<': -0.4,
 '(-:o': 1.5,
 '(-:O': 1.5,
 '(-:{': -0.1,
 '(-:|>*': 1.9,
 '(-;': 1.3,
 '(-;|': 2.1,
 '(8': 2.6,
 '(:': 2.2,
 '(:0': 2.4,
 '(:<': -0.2,
 '(:o': 2.5,
 '(:O': 2.5,
 '(;': 1.1,
 '(;<': 0.3,
 '(=': 2.2,
 '(?:': 2.1,
 '(^:': 1.5,
 '(^;': 1.5,
 '(^;0': 2.0,
 '(^;o': 1.9,
 '(o:': 1.6,
 ")':": -2.0,
 ")-':": -2.1,
 ')-:': -2.1,
 ')-:<': -2.2,
 ')-:{': -2.1,
 '):': -1.8,
 '):<': -1.9,
 '):{': -2.3,
 ');<': -2.6,
 '*)': 0.6,
 '*-)': 0.3,
 '*-:': 2.1,
 '*-;': 2.4,
 '*:': 1.9,
 '*<|:-)': 1.6,
 '*\\0/*': 2.3,
 '*^:': 1.6,
 ',-:': 1.2,
 "---'-;-{@": 2.3,
 '--<--<@': 2.2,
 '.-:': -1.2,
 '..###-:': -1.7,
 '..###:': -1.9,
 '/-:': -1.3,
 '/:': -1.3,
 '/:<': -1.4,
 '/=': -0.9,
 '/^:': -1.0,
 '/o:': -1.4,
 '0-8': 0.1,
 '0-|': -1.2,
 '0:)': 1.9,
 '0:-)': 1.4,
 '0:-3': 1.5,
 '0:03': 1.9,
 '

In [35]:
from gensim.models.word2vec import Word2Vec

paramiko missing, opening SSH/SCP/SFTP paths will be disabled.  `pip install paramiko` to suppress


In [37]:
#Word2Vec(df.tokenized).most_similar('twitter')

In [47]:
from nltk.tokenize import word_tokenize

df = pd.read_csv('trading-tweets.csv')

In [49]:
df['text_lower'] = df.text.str.lower() 

In [51]:
df.head()

Unnamed: 0.1,Unnamed: 0,text,time,text_lower
0,0,"Hoy nos toca irnos en negativo 1,25. Un mercad...",6/13/2019 17:04,"hoy nos toca irnos en negativo 1,25. un mercad..."
1,1,🏄 #GBPUSD Head &amp; Shoulders Pattern Challe...,6/13/2019 17:04,🏄 #gbpusd head &amp; shoulders pattern challe...
2,2,$SPX #stock $AVGO highlights the very short li...,6/13/2019 17:03,$spx #stock $avgo highlights the very short li...
3,3,"As we always said, there are traders and then ...",6/13/2019 17:03,"as we always said, there are traders and then ..."
4,4,RT @EarnsBit: Hello Community!\r\nWe are happy...,6/13/2019 17:03,rt @earnsbit: hello community!\r\nwe are happy...


In [60]:
# need to clean this up 

from bs4 import BeautifulSoup

BeautifulSoup(df.text_lower[0], 'lxml').get_text()

'hoy nos toca irnos en negativo 1,25. un mercado extraño con movimientos alcista y bajistas durante toda la sesión.… https://t.co/cuxsn4taj5'

In [68]:
def clean_text(text):
    try:
        return BeautifulSoup(text, 'lxml').get_text()
    except:
        return text

In [74]:
lexicon['bullish'] = 5
lexicon['bearish'] = -5

In [82]:
analyzer.score('im bullish tesla!')

1.0

In [81]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

sid.polarity_scores('im bullish tesla')['compound']

0.0

In [83]:
"""
bullish
bearish
rally/rallying
buy
sell
support
resistance
flying
rocket
up
down
over
under
long
short
soaring
"""

'"RT @FXDD: #EURUSD Huge Bearish M Pattern Looming Above Important Support  https://t.co/zNhsbmmgxz
#trading #forex #PriceAction #FXDD #Eliteâ€¦"
'

'Sold NZDJPY 71.377 #StrongWeak #Forex #Trend #Trading'
"we should be able to add context for the fact thats a very negative tweet with respect to NZDJPY"



KeyError: 'buy'