In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import re
from multiprocessing.pool import Pool
import datetime
import matplotlib.pyplot as plt
import scipy.stats as stat

In [None]:
file1 = "/kaggle/input/tweets-about-the-top-companies-from-2015-to-2020/Company_Tweet.csv"
file2 = "/kaggle/input/tweets-about-the-top-companies-from-2015-to-2020/Tweet.csv"

In [None]:
twt = pd.merge(pd.read_csv(file1), pd.read_csv(file2), on='tweet_id', how='inner')
twt.shape

# Classify The Tweets

In order to go beyond just calculating the correlation between the market value and the number of tweets, we need a way to quantify each of the tweets. The simplest way to do this is to classify them as either positive or negative. To do so, we make the following assumptions:
* Every tweet about a top company is either negative or non-negative.
* The length of the tweet is not as important as the tone of the tweet.
* If a tweet is not negative towards the company, then it is positive towards the company.

This second assumption is based on the idea that any non-negative publicity of a company is actually good publicity (whether or not the tweeter intended it to be so).

To identify which tweets are negative we check if any of the phrases in the following (obviously incomplete) list are present in the tweet:

In [None]:
negs = np.array(['if only ', 'assholes', 'negative', 'conspiracy', 'hate', 'fuck', 'flip', 'don\'t understand',
                 'doesn\'t understand', 'upset', 'plummet', 'misinformation', ' lies', ' rip ', 'bankruptcy',
                 'bizarre', 'damn', 'deception', 'fraud', ' idiot', ' crap ', 'stupid', 'losing focus',
                 'angry', ' ass ', 'bitch', ' shit', 'disaster', 'drop', 'dumb', 'despise',
                 'racist', 'sexist', 'illegal', 'bias'])

In [None]:
def wordpresent2(s):
    return [negs[i] in s.lower() for i in range(len(negs))]

This next cell takes about 1.5 minutes to run with such a large dataset.

In [None]:
%%time
with Pool(processes=8) as pool:
    outcome = pool.map(wordpresent2, twt.body.values)

In [None]:
outcome = np.array(outcome)
outcome.shape

In [None]:
#How many tweets did each negative word appear in?
for w, cnt in zip(negs,np.sum(outcome, axis=0)):
    print(w, " ", cnt)

In [None]:
#How many tweets classify as negative?
np.sum(np.max(outcome, axis=1))

In [None]:
twt["NegativeTweet"] = np.max(outcome, axis=1)

In [None]:
twt["PositiveTweet"] = ~twt.NegativeTweet.values

In [None]:
twt.columns

# Reduce Date to Day Number

In [None]:
twt["Daynum"] = ((twt.post_date.values - twt.post_date.values[0])/(60*60*24)).astype(int)

In [None]:
marketvals = pd.read_csv("/kaggle/input/values-of-top-nasdaq-copanies-from-2010-to-2020/CompanyValues.csv")

In [None]:
marketvals.columns

In [None]:
dt = pd.to_datetime(marketvals.day_date.values, format="%Y-%m-%d")
marketvals["Daynum"] = dt.dayofyear + (dt.year-2015)*365 + 1*(dt.year > 2016) - 1

In [None]:
marketvals = marketvals.sort_values("Daynum")

# Tesla

In [None]:
teslavals = marketvals.iloc[marketvals.ticker_symbol.values == "TSLA"]
teslavals["delta_close_value"] = np.array([0] + list(teslavals.close_value.values[1:] - teslavals.close_value.values[:-1]))
teslavals = teslavals.iloc[teslavals.Daynum.values >= 0]
#teslavals.iloc[:20]

In [None]:
teslatwt = twt.iloc[twt.ticker_symbol.values == "TSLA"]

In [None]:
teslatwtcnt1 = teslatwt[["Daynum","NegativeTweet"]].groupby(["Daynum"]).sum()
plt.scatter(teslatwtcnt1.index, teslatwtcnt1.NegativeTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Negative Tweets", fontsize=12)
plt.title("Tesla", fontsize=14)
plt.show()

In [None]:
teslatwtcnt2 = teslatwt[["Daynum","PositiveTweet"]].groupby(["Daynum"]).sum()
plt.scatter(teslatwtcnt2.index, teslatwtcnt2.PositiveTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Positive Tweets", fontsize=12)
plt.title("Tesla", fontsize=14)
plt.show()

In [None]:
plt.scatter(teslavals.Daynum.values, teslavals.close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value", fontsize=12)
plt.title("Tesla", fontsize=14)
plt.show()

In [None]:
plt.scatter(teslavals.Daynum.values, teslavals.delta_close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value Change", fontsize=12)
plt.title("Tesla", fontsize=14)
plt.show()

In [None]:
meannegtwts = []
for d1, d2 in zip(teslavals.Daynum.values[:-1], teslavals.Daynum.values[1:]):
    tempdf = teslatwtcnt1.iloc[(teslatwtcnt1.index >= d1) & (teslatwtcnt1.index < d2)]
    if tempdf.shape[0] > 0:
        meannegtwts.append(np.mean(tempdf.NegativeTweet.values))
    else:
        meannegtwts.append(0)

In [None]:
plt.scatter(meannegtwts, teslavals.delta_close_value.values[1:])
plt.xlabel("Neg. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Tesla", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meannegtwts, teslavals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meannegtwts, teslavals.delta_close_value.values[1:])

### So it appears that negative tweets are not significantly correlated with the change in Tesla's market value.

In [None]:
meanpostwts = []
for d1, d2 in zip(teslavals.Daynum.values[:-1], teslavals.Daynum.values[1:]):
    tempdf = teslatwtcnt2.iloc[(teslatwtcnt2.index >= d1) & (teslatwtcnt2.index < d2)]
    if tempdf.shape[0] > 0:
        meanpostwts.append(np.mean(tempdf.PositiveTweet.values))
    else:
        meanpostwts.append(0)

In [None]:
plt.scatter(meanpostwts, teslavals.delta_close_value.values[1:])
plt.xlabel("Pos. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Tesla", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meanpostwts, teslavals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meanpostwts, teslavals.delta_close_value.values[1:])

### It also appears that the number of positive tweets also has no significant impact on the market value of Tesla.

# Apple

In [None]:
aplvals = marketvals.iloc[marketvals.ticker_symbol.values == "AAPL"]
aplvals["delta_close_value"] = np.array([0] + list(aplvals.close_value.values[1:] - aplvals.close_value.values[:-1]))
aplvals = aplvals.iloc[aplvals.Daynum.values >= 0]
#aplvals.iloc[:20]

In [None]:
apltwt = twt.iloc[twt.ticker_symbol.values == "AAPL"]

In [None]:
apltwtcnt1 = apltwt[["Daynum","NegativeTweet"]].groupby(["Daynum"]).sum()
plt.scatter(apltwtcnt1.index, apltwtcnt1.NegativeTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Negative Tweets", fontsize=12)
plt.title("Apple", fontsize=14)
plt.show()

In [None]:
apltwtcnt2 = apltwt[["Daynum","PositiveTweet"]].groupby(["Daynum"]).sum()
plt.scatter(apltwtcnt2.index, apltwtcnt2.PositiveTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Positive Tweets", fontsize=12)
plt.title("Apple", fontsize=14)
plt.show()

In [None]:
plt.scatter(aplvals.Daynum.values, aplvals.close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value", fontsize=12)
plt.title("Apple", fontsize=14)
plt.show()

In [None]:
plt.scatter(aplvals.Daynum.values, aplvals.delta_close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value Change", fontsize=12)
plt.title("Apple", fontsize=14)
plt.show()

In [None]:
meannegtwts = []
for d1, d2 in zip(aplvals.Daynum.values[:-1], aplvals.Daynum.values[1:]):
    tempdf = apltwtcnt1.iloc[(apltwtcnt1.index >= d1) & (apltwtcnt1.index < d2)]
    if tempdf.shape[0] > 0:
        meannegtwts.append(np.mean(tempdf.NegativeTweet.values))
    else:
        meannegtwts.append(0)

In [None]:
plt.scatter(meannegtwts, aplvals.delta_close_value.values[1:])
plt.xlabel("Neg. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Apple", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meannegtwts, aplvals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meannegtwts, aplvals.delta_close_value.values[1:])

### So it appears that, when using Pearson's correlation coefficient, negative tweets do have a statistically significant impact on the next days market value change for Apple. However, since Spearman's correlation coefficient does not produce this same result, this is largely driven by strong outliers. In other words, only on days where a significantly large number of negative tweets are made is it significantly correlated with the change of Apple's market value the next day.

We note that this does not imply that the negative tweets caused the change in the market value.

In [None]:
meanpostwts = []
for d1, d2 in zip(aplvals.Daynum.values[:-1], aplvals.Daynum.values[1:]):
    tempdf = apltwtcnt2.iloc[(apltwtcnt2.index >= d1) & (apltwtcnt2.index < d2)]
    if tempdf.shape[0] > 0:
        meanpostwts.append(np.mean(tempdf.PositiveTweet.values))
    else:
        meanpostwts.append(0)

In [None]:
plt.scatter(meanpostwts, aplvals.delta_close_value.values[1:])
plt.xlabel("Pos. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Apple", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meanpostwts, aplvals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meanpostwts, aplvals.delta_close_value.values[1:])

### On the other hand, it does not appear that the number of positive tweets is correlated with the change in the market value for Apple.

# Microsoft

In [None]:
msftvals = marketvals.iloc[marketvals.ticker_symbol.values == "MSFT"]
msftvals["delta_close_value"] = np.array([0] + list(msftvals.close_value.values[1:] - msftvals.close_value.values[:-1]))
msftvals = msftvals.iloc[msftvals.Daynum.values >= 0]
#msftvals.iloc[:20]

In [None]:
msfttwt = twt.iloc[twt.ticker_symbol.values == "MSFT"]

In [None]:
msfttwtcnt1 = msfttwt[["Daynum","NegativeTweet"]].groupby(["Daynum"]).sum()
plt.scatter(msfttwtcnt1.index, msfttwtcnt1.NegativeTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Negative Tweets", fontsize=12)
plt.title("Microsoft", fontsize=14)
plt.show()

In [None]:
msfttwtcnt2 = msfttwt[["Daynum","PositiveTweet"]].groupby(["Daynum"]).sum()
plt.scatter(msfttwtcnt2.index, msfttwtcnt2.PositiveTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Positive Tweets", fontsize=12)
plt.title("Microsoft", fontsize=14)
plt.show()

In [None]:
plt.scatter(msftvals.Daynum.values, msftvals.close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value", fontsize=12)
plt.title("Microsoft", fontsize=14)
plt.show()

In [None]:
plt.scatter(msftvals.Daynum.values, msftvals.delta_close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value Change", fontsize=12)
plt.title("Microsoft", fontsize=14)
plt.show()

In [None]:
meannegtwts = []
for d1, d2 in zip(msftvals.Daynum.values[:-1], msftvals.Daynum.values[1:]):
    tempdf = msfttwtcnt1.iloc[(msfttwtcnt1.index >= d1) & (msfttwtcnt1.index < d2)]
    if tempdf.shape[0] > 0:
        meannegtwts.append(np.mean(tempdf.NegativeTweet.values))
    else:
        meannegtwts.append(0)

In [None]:
plt.scatter(meannegtwts, msftvals.delta_close_value.values[1:])
plt.xlabel("Neg. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Microsoft", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meannegtwts, msftvals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meannegtwts, msftvals.delta_close_value.values[1:])

### No significant correlation between negative tweet counts and the change in the market value for Microsoft.

In [None]:
meanpostwts = []
for d1, d2 in zip(msftvals.Daynum.values[:-1], msftvals.Daynum.values[1:]):
    tempdf = msfttwtcnt2.iloc[(msfttwtcnt2.index >= d1) & (msfttwtcnt2.index < d2)]
    if tempdf.shape[0] > 0:
        meanpostwts.append(np.mean(tempdf.PositiveTweet.values))
    else:
        meanpostwts.append(0)

In [None]:
plt.scatter(meanpostwts, msftvals.delta_close_value.values[1:])
plt.xlabel("Pos. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Microsoft", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meanpostwts, msftvals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meanpostwts, msftvals.delta_close_value.values[1:])

### No clear correlation between positive tweets and market value change either for Microsoft.

# Amazon

In [None]:
amzvals = marketvals.iloc[marketvals.ticker_symbol.values == "AMZN"]
amzvals["delta_close_value"] = np.array([0] + list(amzvals.close_value.values[1:] - amzvals.close_value.values[:-1]))
amzvals = amzvals.iloc[amzvals.Daynum.values >= 0]
#amzvals.iloc[:20]

In [None]:
amztwt = twt.iloc[twt.ticker_symbol.values == "AMZN"]

In [None]:
amztwtcnt1 = amztwt[["Daynum","NegativeTweet"]].groupby(["Daynum"]).sum()
plt.scatter(amztwtcnt1.index, amztwtcnt1.NegativeTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Negative Tweets", fontsize=12)
plt.title("Amazon", fontsize=14)
plt.show()

In [None]:
amztwtcnt2 = amztwt[["Daynum","PositiveTweet"]].groupby(["Daynum"]).sum()
plt.scatter(amztwtcnt2.index, amztwtcnt2.PositiveTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Positive Tweets", fontsize=12)
plt.title("Amazon", fontsize=14)
plt.show()

In [None]:
plt.scatter(amzvals.Daynum.values, amzvals.close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value", fontsize=12)
plt.title("Amazon", fontsize=14)
plt.show()

In [None]:
plt.scatter(amzvals.Daynum.values, amzvals.delta_close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value Change", fontsize=12)
plt.title("Amazon", fontsize=14)
plt.show()

In [None]:
meannegtwts = []
for d1, d2 in zip(amzvals.Daynum.values[:-1], amzvals.Daynum.values[1:]):
    tempdf = amztwtcnt1.iloc[(amztwtcnt1.index >= d1) & (amztwtcnt1.index < d2)]
    if tempdf.shape[0] > 0:
        meannegtwts.append(np.mean(tempdf.NegativeTweet.values))
    else:
        meannegtwts.append(0)

In [None]:
plt.scatter(meannegtwts, amzvals.delta_close_value.values[1:])
plt.xlabel("Neg. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Amazon", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meannegtwts, amzvals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meannegtwts, amzvals.delta_close_value.values[1:])

### Interestingly enough, we find that (as was the case with Apple) the number of negative tweets has a statistically significant Pearson correlation with the change in the market value for Amazon. As expected, this correlation is negative. Additionally, this correlation is also driven by large values since the Spearman correlation is not statistically significant.

In [None]:
meanpostwts = []
for d1, d2 in zip(amzvals.Daynum.values[:-1], amzvals.Daynum.values[1:]):
    tempdf = amztwtcnt2.iloc[(amztwtcnt2.index >= d1) & (amztwtcnt2.index < d2)]
    if tempdf.shape[0] > 0:
        meanpostwts.append(np.mean(tempdf.PositiveTweet.values))
    else:
        meanpostwts.append(0)

In [None]:
plt.scatter(meanpostwts, amzvals.delta_close_value.values[1:])
plt.xlabel("Pos. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Amazon", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meanpostwts, amzvals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meanpostwts, amzvals.delta_close_value.values[1:])

### Very interesting! We now see that for Amazon the number of positive tweets has a positive, statistically significant, Spearman correlation with the next day's change in market value. 

# Google

In [None]:
googvals = marketvals.iloc[marketvals.ticker_symbol.values == "GOOG"]
googvals["delta_close_value"] = np.array([0] + list(googvals.close_value.values[1:] - googvals.close_value.values[:-1]))
googvals = googvals.iloc[googvals.Daynum.values >= 0]
#googvals.iloc[:20]

In [None]:
googtwt = twt.iloc[twt.ticker_symbol.values == "GOOG"]

In [None]:
googtwtcnt1 = googtwt[["Daynum","NegativeTweet"]].groupby(["Daynum"]).sum()
plt.scatter(googtwtcnt1.index, googtwtcnt1.NegativeTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Negative Tweets", fontsize=12)
plt.title("Google", fontsize=14)
plt.show()

In [None]:
googtwtcnt2 = googtwt[["Daynum","PositiveTweet"]].groupby(["Daynum"]).sum()
plt.scatter(googtwtcnt2.index, googtwtcnt2.PositiveTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Positive Tweets", fontsize=12)
plt.title("Google", fontsize=14)
plt.show()

In [None]:
plt.scatter(googvals.Daynum.values, googvals.close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value", fontsize=12)
plt.title("Google", fontsize=14)
plt.show()

In [None]:
plt.scatter(googvals.Daynum.values, googvals.delta_close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value Change", fontsize=12)
plt.title("Google", fontsize=14)
plt.show()

In [None]:
meannegtwts = []
for d1, d2 in zip(googvals.Daynum.values[:-1], googvals.Daynum.values[1:]):
    tempdf = googtwtcnt1.iloc[(googtwtcnt1.index >= d1) & (googtwtcnt1.index < d2)]
    if tempdf.shape[0] > 0:
        meannegtwts.append(np.mean(tempdf.NegativeTweet.values))
    else:
        meannegtwts.append(0)

In [None]:
plt.scatter(meannegtwts, googvals.delta_close_value.values[1:])
plt.xlabel("Neg. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Google", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meannegtwts, googvals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meannegtwts, googvals.delta_close_value.values[1:])

### No statistically significant correlation for Google.

In [None]:
meanpostwts = []
for d1, d2 in zip(googvals.Daynum.values[:-1], googvals.Daynum.values[1:]):
    tempdf = googtwtcnt2.iloc[(googtwtcnt2.index >= d1) & (googtwtcnt2.index < d2)]
    if tempdf.shape[0] > 0:
        meanpostwts.append(np.mean(tempdf.PositiveTweet.values))
    else:
        meanpostwts.append(0)

In [None]:
plt.scatter(meanpostwts, googvals.delta_close_value.values[1:])
plt.xlabel("Pos. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Google", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meanpostwts, googvals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meanpostwts, googvals.delta_close_value.values[1:])

### Nothing for positive tweets either.

# Google Inc.

In [None]:
googlvals = marketvals.iloc[marketvals.ticker_symbol.values == "GOOGL"]
googlvals["delta_close_value"] = np.array([0] + list(googlvals.close_value.values[1:] - googlvals.close_value.values[:-1]))
googlvals = googlvals.iloc[googlvals.Daynum.values >= 0]
#googlvals.iloc[:20]

In [None]:
googltwt = twt.iloc[twt.ticker_symbol.values == "GOOGL"]

In [None]:
googltwtcnt1 = googltwt[["Daynum","NegativeTweet"]].groupby(["Daynum"]).sum()
plt.scatter(googltwtcnt1.index, googltwtcnt1.NegativeTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Negative Tweets", fontsize=12)
plt.title("Google Inc.", fontsize=14)
plt.show()

In [None]:
googltwtcnt2 = googltwt[["Daynum","PositiveTweet"]].groupby(["Daynum"]).sum()
plt.scatter(googltwtcnt2.index, googltwtcnt2.PositiveTweet.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Positive Tweets", fontsize=12)
plt.title("Google Inc.", fontsize=14)
plt.show()

In [None]:
plt.scatter(googlvals.Daynum.values, googlvals.close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value", fontsize=12)
plt.title("Google Inc.", fontsize=14)
plt.show()

In [None]:
plt.scatter(googlvals.Daynum.values, googlvals.delta_close_value.values)
plt.xlabel("Days after Jan. 1, 2015", fontsize=12)
plt.ylabel("Market Value Change", fontsize=12)
plt.title("Google Inc.", fontsize=14)
plt.show()

In [None]:
meannegtwts = []
for d1, d2 in zip(googlvals.Daynum.values[:-1], googlvals.Daynum.values[1:]):
    tempdf = googltwtcnt1.iloc[(googltwtcnt1.index >= d1) & (googltwtcnt1.index < d2)]
    if tempdf.shape[0] > 0:
        meannegtwts.append(np.mean(tempdf.NegativeTweet.values))
    else:
        meannegtwts.append(0)

In [None]:
plt.scatter(meannegtwts, googlvals.delta_close_value.values[1:])
plt.xlabel("Neg. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Google Inc.", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meannegtwts, googlvals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meannegtwts, googlvals.delta_close_value.values[1:])

### No statistically significant correlation for Google Inc.

In [None]:
meanpostwts = []
for d1, d2 in zip(googlvals.Daynum.values[:-1], googlvals.Daynum.values[1:]):
    tempdf = googltwtcnt2.iloc[(googltwtcnt2.index >= d1) & (googltwtcnt2.index < d2)]
    if tempdf.shape[0] > 0:
        meanpostwts.append(np.mean(tempdf.PositiveTweet.values))
    else:
        meanpostwts.append(0)

In [None]:
plt.scatter(meanpostwts, googlvals.delta_close_value.values[1:])
plt.xlabel("Pos. Tweets Per Day Since Last Close")
plt.ylabel("Market Value Change of Next Day", fontsize=12)
plt.title("Google Inc.", fontsize=14)
plt.show()

In [None]:
stat.spearmanr(meanpostwts, googlvals.delta_close_value.values[1:])

In [None]:
stat.pearsonr(meanpostwts, googlvals.delta_close_value.values[1:])

### It appears that the number of positive tweets are positively correlated with the change in the market value of Google Inc.

# Conclusion

Overall, we find that the number of negative tweets has a negative, statistically significant, correlation with the change in the market value for Apple and Amazon. In both of these cases, this correlation is strongly driven by days where an unusually large number of negative tweets about the company occurred.

Furthermore, we find that the number of positive tweets has a positive, statistically significant, correlation with the change in the market value for Amazon and Google Inc. Interestingly enough, this correlation is strongly driven by days of unusually large numbers of positive tweets for Google Inc., but is found to instead be a general trend for Amazon.