In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import datetime
import matplotlib.pyplot as plt
tweets = pd.read_csv('/kaggle/input/bert-transfer-learning-from-s140/tweets_sentiment.csv')
marketvals = pd.read_csv("/kaggle/input/values-of-top-nasdaq-copanies-from-2010-to-2020/CompanyValues.csv")

Sentiment analysis for this kernel was completed by using transfer learning after fine tuning BERT on the s140 sentiment analysis dataset in [this kernel](https://www.kaggle.com/stevenknguyen/bert-transfer-learning-from-s140/).

In [None]:
marketvals = marketvals[marketvals['day_date'] >= '2015-01-01']
marketvals = marketvals.reset_index(drop=True)
tweets['date'] = pd.to_datetime(tweets['post_date'], unit='s').dt.strftime('%Y/%m/%d')

To do this analysis, I am only going to analyze Apple stock and do a little feature engineering to make it easier to find the lowest public opinion on Twitter.

In [None]:
stockdaycount = tweets[["date","ticker_symbol", "sentiment"]].groupby(["date", "ticker_symbol"], as_index=False).agg({'sentiment': ['sum', 'count']})
stockdaycount.columns = list(map(''.join, stockdaycount.columns.values))
stockdaycount['sentiment_rat'] = stockdaycount['sentimentsum'] / stockdaycount['sentimentcount']
stockdaycount['sentiment_rat'] = stockdaycount['sentiment_rat'].astype(np.float32)

In [None]:
import matplotlib.dates as mdates
aapltwt = stockdaycount.iloc[stockdaycount.ticker_symbol.values == "AAPL"]
applclose = marketvals.iloc[marketvals.ticker_symbol.values == "AAPL"]
fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True)
ax1.plot('date', 'sentiment_rat', data=aapltwt)
ax2.plot('date', 'sentimentcount', data=aapltwt)
ax3.plot('day_date', 'close_value', data=applclose)
years = mdates.YearLocator()   # every year
months = mdates.MonthLocator()  # every month
years_fmt = mdates.DateFormatter('%Y')
ax1.xaxis.set_major_locator(years)
ax1.xaxis.set_major_formatter(years_fmt)
ax1.xaxis.set_minor_locator(months)
ax1.set_title('Ratio of positive to negative tweets for AAPL')

ax2.xaxis.set_major_locator(years)
ax2.xaxis.set_major_formatter(years_fmt)
ax2.xaxis.set_minor_locator(months)
ax2.set(ylabel='no of tweets')
ax3.xaxis.set_major_locator(years)
ax3.xaxis.set_major_formatter(years_fmt)
ax3.xaxis.set_minor_locator(months)
ax3.set(ylabel='closing price')

plt.show()

There aren't any obvious trends from public sentiment and their stock price except for maybe a couple dips because there is a sinusoidal trend for public opinion, but a clear negative trend for Apple in the first couple years of data given. I'm going to check the lowest sentiment day in the dataset

In [None]:
lowest_day = stockdaycount[stockdaycount.sentimentcount>200]
lowest_day = lowest_day[lowest_day.sentiment_rat==lowest_day.sentiment_rat.min()]
lowest_day

Pull data from the week of the lowest day and compare it to the lowest stock price.

In [None]:
lowest_sentiment = stockdaycount[(stockdaycount.ticker_symbol=='AAPL') & (stockdaycount.date>='2016/08/27')& (stockdaycount.date<='2016/09/03')]
lowest_price = marketvals[(marketvals.ticker_symbol=='AAPL') & (marketvals.day_date>='2016-08-27')& (marketvals.day_date<='2016-09-03')]
lowest_price = lowest_price.sort_values(by='day_date')
lowest_sentiment['close_value'] = lowest_price['close_value'].values
lowest_sentiment

If only by a few cents, the lowest sentiment does correspond with the lowest closing price for their stock. Lets check the price for the preceeding and following quarter.

In [None]:
marketvals[(marketvals.ticker_symbol=='AAPL') & (marketvals.day_date>='2016-06-01')& (marketvals.day_date<='2016-09-01')]['close_value'].describe()

In [None]:
marketvals[(marketvals.ticker_symbol=='AAPL') & (marketvals.day_date>='2016-09-01')& (marketvals.day_date<='2017-01-01')]['close_value'].describe()

Their stock price actually increases because of this event, but lets look at the actual tweets to see what the topic is.

In [None]:
lowestaapltweets = tweets[(tweets.ticker_symbol=='AAPL') & (tweets.date>='2016/08/30')& (tweets.date<='2016/08/31')]
lowestaapltweets = lowestaapltweets[lowestaapltweets.sentiment==0]
lowestaapltweets.sort_values(by='like_num', ascending=False).head(n=10)['body'].values

![screenshot](https://i.imgur.com/3yZleyB.png)
There was an [article in the NY times](
https://www.nytimes.com/2016/08/31/technology/apple-tax-eu-ireland.html) around this time that sparked an outrage on Twitter and the corresponds to both the lowest sentiment and the lowest stock price (if only a little bit) for that 2 week period, but as the the article suggests, the price went up from Apple Inc. not paying their taxes in the EU.