In [None]:
# for Sentiment Analysis
from textblob import TextBlob
from sklearn.preprocessing import MinMaxScaler

import pandas as pd
pd.options.mode.chained_assignment = None

%load_ext google.colab.data_table 

In [None]:
# Sentiment analysis by TextBlob
# polarity is a float within the range [-1.0, 1.0] where 0.0 is very negative and 1.0 is very positive
# subjectivity is a float within the range [0.0, 1.0] where 0.0 is very objective and 1.0 is very subjective
# Subjective sentences generally refer to personal opinion, emotion or judgment whereas objective refers to factual information

sentence = 'This class is interesting'

print(sentence)
tb = TextBlob(sentence)
print('polarity=', tb.polarity)
print('subjectivity=', tb.subjectivity)

In [None]:
# Textblob is rule and pattern based.
# https://planspace.org/20150607-textblob_sentiment/
# https://github.com/sloria/TextBlob/blob/eb08c120d364e908646731d60b4e4c6c1712ff63/textblob/en/en-sentiment.xml
# <word form="interesting" wordnet_id="a-01343918" pos="JJ" sense="arousing or holding the attention" polarity="0.5" subjectivity="0.5" intensity="1.0" confidence="0.9" />
# pos="JJ" (adjective)

tb.sentiment_assessments

In [None]:
# Handle “modifier” such as "very"
# <word form="very" wordnet_id="r-00031899" pos="RB" sense="used as intensifier" polarity="0.2" subjectivity="0.3" intensity="1.3" confidence="0.9" />
# "very" intensity="1.3"
sentence = 'This class is very interesting'

print(sentence)
tb = TextBlob(sentence)
print('polarity=', tb.polarity)
tb.sentiment_assessments

In [None]:
# Handle “negation” such as "not"
# self.negations   = kwargs.get("negations", ("no", "not", "n't", "never"))
sentence = 'This class is not interesting'

print(sentence)
tb = TextBlob(sentence)
print('polarity=', tb.polarity)
tb.sentiment_assessments

In [None]:
# Handle mood (Emoticons)
# https://github.com/sloria/TextBlob/blob/dev/textblob/_text.py#L223
# ("smile", +0.50): set((">:)", ":-)", ":)", "=)", "=]", ":]", ":}", ":>", ":3", "8)", "8-)")),
sentence = 'oh :)'

print(sentence)
tb = TextBlob(sentence)
print('polarity=', tb.polarity)
tb.sentiment_assessments

In [None]:
# Handle irony (sarcasm)
sentence = "You're really good (!)"

print(sentence)
tb = TextBlob(sentence)
print('polarity=', tb.polarity)
tb.sentiment_assessments

In [None]:
# Understand profanity (dirty word) 
# Don't understand punctuation (probably the punctuations are removed in the text pre-processing step)
print(TextBlob("he is a moron").sentiment_assessments)
print(TextBlob("who is the moron?").sentiment_assessments)

In [None]:
# Averaging the sentiment scores for the overall polarity: "interesting" and "tough"
sentence = "This class is interesting but the content is too tough"

print(sentence)
tb = TextBlob(sentence)
print('polarity=', tb.polarity)
tb.sentiment_assessments

In [None]:
# When a word (such as "mild") has different meaning in context, again use averaging
#<word form="mild" cornetto_synset_id="n_a-518871" wordnet_id="a-01893510" pos="JJ" sense="humble in spirit or manner" polarity="0.5" subjectivity="0.5" intensity="1.0" confidence="0.9" />
#<word form="mild" cornetto_synset_id="n_a-535263" wordnet_id="a-00438332" pos="JJ" sense="mild and pleasant" polarity="0.5" subjectivity="0.5" intensity="1.0" confidence="0.9" />
#<word form="mild" wordnet_id="a-01508719" pos="JJ" sense="moderate in type or degree or effect or force" polarity="0.0" subjectivity="0.5" intensity="1.0" confidence="0.9" />

sentence = 'The effect of the drug is very mlid'

print(sentence)
tb = TextBlob(sentence)
print('polarity=', tb.polarity)
tb.sentiment_assessments

In [None]:
# Use the Apple Tweets to predict the Apple stock price daily change
url = 'https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/data/appleTweets.xlsx'
appleTweets = pd.read_excel(url)

appleTweets.shape

In [None]:
# Show partial results
SHOW_NUMS = 5000
appleTweets[:SHOW_NUMS]

In [None]:
appleTweets.columns

In [None]:
# Look at the subset of useful columns for the sentiment trading
COLUMNS = ['Date', 'User Name', 'Tweet content', 'Following', 'Hashtags']
appleTweetsSubset = appleTweets[COLUMNS]

appleTweetsSubset[:SHOW_NUMS]

In [None]:
# Count Duplicates 
print('Duplicate content count=', appleTweetsSubset[['Tweet content']].duplicated().sum())

# Remove duplicate
print("Original row# :", appleTweetsSubset.shape[0])
appleTweetsSubset = appleTweetsSubset.drop_duplicates(subset='Tweet content', keep='first')
print("Row after removed duplicates# :", appleTweetsSubset.shape[0])

In [None]:
# Filter the contents with at least one following
MIN_FOLLOWING = 1

# Filter tweet with at least the MIN_FOLLOWING
appleTweetsFiltered = appleTweetsSubset[(appleTweetsSubset['Following'] >= MIN_FOLLOWING)]
appleTweetsFiltered.reset_index(drop=True, inplace=True)

print("Original row# :", appleTweetsSubset.shape[0])
print("Filtered row# :", appleTweetsFiltered.shape[0])

In [None]:
appleTweetsFiltered[:SHOW_NUMS]

In [None]:
# Use TextBlob to run the tweets sentiment polarity
appleTweetsFiltered['sentiment'] = appleTweetsFiltered['Tweet content'].apply(lambda x: TextBlob(x).polarity)

# Weight the tweets sentiment importance by the number of followings
appleTweetsFiltered['sentiment_weighted'] = appleTweetsFiltered['sentiment'] * appleTweetsFiltered['Following']

appleTweetsFiltered[:SHOW_NUMS]

In [None]:
# Check Sentiment assessment
INDEX = 3763

text = appleTweetsFiltered.iloc[INDEX]['Tweet content']
print(text)

TextBlob(text).sentiment_assessments

In [None]:
# Plot the sentiment_weighted
appleTweetsFiltered['sentiment_weighted'].plot(figsize=(12, 8))

In [None]:
# Group the weighted sentiment by Date for matching the stock daily change

# Convert Date string to datetime to match with the stock daily change later
appleTweetsFiltered['Date'] = pd.to_datetime(appleTweetsFiltered['Date'])

aggregateSentiments = appleTweetsFiltered.groupby(['Date']).sum()[['sentiment_weighted']]
aggregateSentiments

In [None]:
# get stocks daily data (OHLCV) from Yahoo
import pandas_datareader.data as web
from datetime import datetime

start = datetime(2016, 4, 2) 
end = datetime(2016, 4, 30) 
stock= web.DataReader('AAPL', 'yahoo', start=start, end=end)
stock

In [None]:
# calculate the stock daily change
stock['change'] = (stock['Close'] - stock['Open']) / stock['Open']
stock[['Open', 'Close', 'change']]

In [None]:
# Merge the daily stock price change with the sentiments
# Use the daily aggregated sentiment to predict the daily price change
merged = stock.merge(aggregateSentiments, on='Date', how='left')[['change', 'sentiment_weighted']]
merged

In [None]:
# Scale the unit to -1 to 1
scaler = MinMaxScaler((-1, 1))
merged['changes'] = scaler.fit_transform(merged[['change']])
merged['sentiments'] = scaler.fit_transform(merged[['sentiment_weighted']])
scaled = merged[['changes', 'sentiments']]
scaled

In [None]:
scaled.plot(figsize=(15, 8))

In [None]:
# shows the correlation
scaled.corr()

In [None]:
# Try sentiments with different date lags

# Sentiment shift backwards -> Current day sentiments predicts next day stock price change (predictive)
scaled['sentiment-1'] = merged['sentiments'].shift(-1)

# Sentiment shift forwards -> Current day sentiments reflects yesterday's price change (reactive)
scaled['sentiment+1'] = merged['sentiments'].shift(1)
scaled

In [None]:
scaled.corr()

In [None]:
# Filter contents with different conditions to get more "relevant" tweets
# Use different weightings apart from the Followings
# Take sentiment subjectivity into account (maybe more subjective is more important)
# Is the coverage enough? Add different data source of sentiment
# Use a better sentiment analysis engine (e.g. tailored made with social media content)
# Try with different date lags and compares the close price changes rather than open-close changes
# Use sentiment moving average or long/short term sentiment cross over
# Combine with other technical indicators such as stock price moving average
# Combine with other Machine Learned signals or trends