In [128]:
# https://www.cs.toronto.edu/~lczhang/321/lec/rnn_notes.html

import csv
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchtext
import numpy as np
import matplotlib.pyplot as plt
import torchtext
import pandas

from dateutil import parser
import json

In [126]:
DIMENSION_SIZE = 50

def split_tweet(tweet):
    # separate punctuations
    tweet = tweet.replace(".", " . ") \
                 .replace(",", " , ") \
                 .replace(";", " ; ") \
                 .replace("?", " ? ") \
                 .replace("@", " @ ") \
                 .replace("#", " # ")
    return tweet.lower().split()
glove = torchtext.vocab.GloVe(name="6B", dim=DIMENSION_SIZE)

In [17]:
class RnnSentimentClf(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, num_rnn_stacks=1, dropout=0):
        super(RnnSentimentClf, self).__init__()
        self.emb = nn.Embedding.from_pretrained(glove.vectors)
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True, num_layers=num_rnn_stacks, dropout=dropout)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        # Look up the embedding
        x = self.emb(x)
        # Forward propagate the RNN
        out, _ = self.rnn(x)
        # Pass the output of the last time step to the classifier
        out = self.fc(out[:, -1, :])
        return out

In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [19]:
# df = pandas.read_csv(filepath_or_buffer="../data/tweets/tweets_remaining_09042020_16072020.csv", sep=';')
# df.columns
# df.shape

# df['created_at'] = df['created_at'].apply(lambda x: parser.parse(x).date().__str__())
# df.to_csv("../data/tweets/dated_unnannotated_tweets.csv", sep=';', line_terminator="\n")

df = pandas.read_csv("../data/tweets/dated_unnannotated_tweets.csv", sep=";", lineterminator="\n")
df.head

<bound method NDFrame.head of         Unnamed: 0      id  created_at  \
0                0       1  2020-04-09   
1                1       2  2020-04-09   
2                2       3  2020-04-09   
3                3       4  2020-04-09   
4                4       5  2020-04-09   
...            ...     ...         ...   
923668      923668  938668  2020-07-16   
923669      923669  938669  2020-07-16   
923670      923670  938670  2020-07-16   
923671      923671  938671  2020-07-16   
923672      923672  938672  2020-07-16   

                                                full_text  
0       @KennyDegu very very little volume. With $10T ...  
1       #ES_F achieved Target 2780 closing above 50% #...  
2       RT @KimbleCharting: Silver/Gold indicator crea...  
3       @Issaquahfunds Hedged our $MSFT position into ...  
4       RT @zipillinois: 3 Surprisingly Controversial ...  
...                                                   ...  
923668  RT @MadMraket: We tend to spend lot o

In [20]:
df['created_at'].unique()

array(['2020-04-09', '2020-04-10', '2020-04-11', '2020-04-12',
       '2020-04-13', '2020-04-14', '2020-04-15', '2020-04-16',
       '2020-04-17', '2020-04-18', '2020-04-19', '2020-04-20',
       '2020-04-21', '2020-04-22', '2020-04-23', '2020-04-24',
       '2020-04-25', '2020-04-26', '2020-04-27', '2020-04-28',
       '2020-04-29', '2020-05-03', '2020-05-04', '2020-05-05',
       '2020-05-06', '2020-05-07', '2020-05-08', '2020-05-09',
       '2020-05-28', '2020-05-29', '2020-05-30', '2020-05-31',
       '2020-06-01', '2020-06-02', '2020-06-03', '2020-06-04',
       '2020-06-05', '2020-06-06', '2020-06-07', '2020-06-08',
       '2020-06-09', '2020-06-10', '2020-06-11', '2020-06-13',
       '2020-06-14', '2020-06-15', '2020-06-16', '2020-06-17',
       '2020-06-18', '2020-06-19', '2020-06-20', '2020-06-21',
       '2020-06-22', '2020-06-23', '2020-06-24', '2020-06-25',
       '2020-06-26', '2020-06-27', '2020-06-28', '2020-06-29',
       '2020-06-30', '2020-07-01', '2020-07-02', '2020-

In [23]:
model = RnnSentimentClf(input_size=DIMENSION_SIZE, hidden_size=DIMENSION_SIZE, num_classes=2).to(device)
model.load_state_dict(torch.load("../data/models/sentiment-clfs/rnn-dim50.pt", map_location=device))
model.eval()

RnnSentimentClf(
  (emb): Embedding(400000, 50)
  (rnn): RNN(50, 50, batch_first=True)
  (fc): Linear(in_features=50, out_features=2, bias=True)
)

In [36]:
texts = df['full_text'].to_list()

texts = [split_tweet(text) for text in texts]


In [51]:
preds = []
index = 0
for text in texts:
    if index % 100000 == 0:
        print("At idx: {}".format(index))

    idxs = [glove.stoi[w] for w in text if w in glove.stoi] # keep words that has an embedding
    if len(idxs) == 0:
        pred = 0 # 0 by default
        print("Index {} has no valid words :(".format(index))
    else:
        idxs = torch.tensor(idxs, dtype=torch.int64)
        idxs = torch.unsqueeze(idxs, dim=0)

        out = model(idxs)
        pred = out.max(1, keepdim=True)[1][0].item()

    preds.append(pred)
    index += 1

print(len(preds))
# print(pred)

At idx: 0
At idx: 100000
At idx: 200000
Index 260520 has no valid words :(
At idx: 300000
At idx: 400000
At idx: 500000
At idx: 600000
At idx: 700000
At idx: 800000
Index 814926 has no valid words :(
At idx: 900000
923673


In [53]:
df['sentiment'] = preds

In [55]:
df['sentiment']

0         0
1         1
2         1
3         0
4         1
         ..
923668    1
923669    1
923670    1
923671    0
923672    1
Name: sentiment, Length: 923673, dtype: int64

In [56]:
df.to_csv("../data/tweets/dated_annotated_tweets.csv", sep=';', line_terminator="\n")

df = pandas.read_csv("../data/tweets/dated_annotated_tweets.csv", sep=";", lineterminator="\n")


In [58]:
df.head

<bound method NDFrame.head of         Unnamed: 0.1  Unnamed: 0      id  created_at  \
0                  0           0       1  2020-04-09   
1                  1           1       2  2020-04-09   
2                  2           2       3  2020-04-09   
3                  3           3       4  2020-04-09   
4                  4           4       5  2020-04-09   
...              ...         ...     ...         ...   
923668        923668      923668  938668  2020-07-16   
923669        923669      923669  938669  2020-07-16   
923670        923670      923670  938670  2020-07-16   
923671        923671      923671  938671  2020-07-16   
923672        923672      923672  938672  2020-07-16   

                                                full_text  sentiment  
0       @KennyDegu very very little volume. With $10T ...          0  
1       #ES_F achieved Target 2780 closing above 50% #...          1  
2       RT @KimbleCharting: Silver/Gold indicator crea...          1  
3       @Issa

In [127]:
df = pandas.read_csv(filepath_or_buffer="../data/tweets/dated_annotated_tweets.csv", sep=';', lineterminator="\n")
df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'id', 'created_at', 'full_text',
       'sentiment'],
      dtype='object')

In [104]:
def count_tweet_sentiment_by_keywords(keywords: list, df: pandas.DataFrame):
    sentiment_by_date = {}

    for i, row in df.iterrows():
        if i % 100000 == 0:
            print("At index {}".format(i))
        
        text = split_tweet(row['full_text'])
        text = [word.lower() for word in text]

        match = False
        for key in keywords:
            if key.lower() in text:
                match = True
                # print("Match: {}, {}".format(i, key))
                break
        
        if match:
            date = row['created_at']
            sentiment = row['sentiment']
            if date not in sentiment_by_date:
                sentiment_by_date[date] = {'p': 0, 'n': 0}
            sentiment_by_date[date]['p'] += sentiment
            sentiment_by_date[date]['n'] += (1 - sentiment)
    return sentiment_by_date


In [147]:
counts_apple = count_tweet_sentiment_by_keywords(keywords=['apple', 'appl', '$aapl', 'iphone', 'macbook'], df=df)
print(counts_apple)

with open("apple_sentiments.json", "w") as outf:
    json.dump(counts_apple, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 461, 'n': 521}, '2020-04-10': {'p': 266, 'n': 263}, '2020-04-11': {'p': 170, 'n': 350}, '2020-04-12': {'p': 135, 'n': 305}, '2020-04-13': {'p': 450, 'n': 613}, '2020-04-14': {'p': 853, 'n': 899}, '2020-04-15': {'p': 764, 'n': 746}, '2020-04-16': {'p': 594, 'n': 775}, '2020-04-17': {'p': 864, 'n': 1659}, '2020-04-18': {'p': 316, 'n': 462}, '2020-04-19': {'p': 294, 'n': 364}, '2020-04-20': {'p': 512, 'n': 666}, '2020-04-21': {'p': 835, 'n': 682}, '2020-04-22': {'p': 514, 'n': 618}, '2020-04-23': {'p': 636, 'n': 775}, '2020-04-24': {'p': 634, 'n': 742}, '2020-04-25': {'p': 291, 'n': 788}, '2020-04-26': {'p': 252, 'n': 707}, '2020-04-27': {'p': 822, 'n': 1022}, '2020-04-28': {'p': 591, 'n': 630}, '2020-04-29': {'p': 511, 'n': 634}, '2020-05-03': {'p': 261, 'n': 405}, '2020-05-04': {'p': 712, 'n': 709}, '2020-05-05': 

In [145]:
counts_google = count_tweet_sentiment_by_keywords(keywords=['google', 'googl', '$goog', 'alphabet', 'waymo', 'sergey', 'pichai'], df=df)
print(counts_google)

with open("google_sentiments.json", "w") as outf:
    json.dump(counts_apple, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 106, 'n': 140}, '2020-04-10': {'p': 136, 'n': 119}, '2020-04-11': {'p': 75, 'n': 50}, '2020-04-12': {'p': 50, 'n': 57}, '2020-04-13': {'p': 94, 'n': 153}, '2020-04-14': {'p': 111, 'n': 175}, '2020-04-15': {'p': 88, 'n': 161}, '2020-04-16': {'p': 143, 'n': 221}, '2020-04-17': {'p': 125, 'n': 173}, '2020-04-18': {'p': 66, 'n': 57}, '2020-04-19': {'p': 53, 'n': 70}, '2020-04-20': {'p': 126, 'n': 171}, '2020-04-21': {'p': 162, 'n': 151}, '2020-04-22': {'p': 105, 'n': 116}, '2020-04-23': {'p': 153, 'n': 199}, '2020-04-24': {'p': 128, 'n': 220}, '2020-04-25': {'p': 49, 'n': 85}, '2020-04-26': {'p': 71, 'n': 92}, '2020-04-27': {'p': 177, 'n': 193}, '2020-04-28': {'p': 590, 'n': 552}, '2020-04-29': {'p': 449, 'n': 448}, '2020-05-03': {'p': 65, 'n': 58}, '2020-05-04': {'p': 99, 'n': 139}, '2020-05-05': {'p': 121, 'n': 165

In [146]:
counts_amzn = count_tweet_sentiment_by_keywords(keywords=['$amzn', 'amazon', 'bezos', 'zoox', 'kuiper', 'alexa'], df=df)
print(counts_amzn)

with open("amzn_sentiments.json", "w") as outf:
    json.dump(counts_amzn, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 301, 'n': 390}, '2020-04-10': {'p': 104, 'n': 210}, '2020-04-11': {'p': 73, 'n': 311}, '2020-04-12': {'p': 138, 'n': 350}, '2020-04-13': {'p': 1126, 'n': 1011}, '2020-04-14': {'p': 1463, 'n': 1158}, '2020-04-15': {'p': 864, 'n': 976}, '2020-04-16': {'p': 1313, 'n': 1592}, '2020-04-17': {'p': 848, 'n': 1390}, '2020-04-18': {'p': 349, 'n': 490}, '2020-04-19': {'p': 282, 'n': 419}, '2020-04-20': {'p': 576, 'n': 756}, '2020-04-21': {'p': 540, 'n': 685}, '2020-04-22': {'p': 413, 'n': 568}, '2020-04-23': {'p': 661, 'n': 1139}, '2020-04-24': {'p': 460, 'n': 1557}, '2020-04-25': {'p': 236, 'n': 968}, '2020-04-26': {'p': 314, 'n': 788}, '2020-04-27': {'p': 519, 'n': 1067}, '2020-04-28': {'p': 634, 'n': 803}, '2020-04-29': {'p': 489, 'n': 895}, '2020-05-03': {'p': 224, 'n': 390}, '2020-05-04': {'p': 480, 'n': 822}, '2020-0

In [140]:
counts_msft = count_tweet_sentiment_by_keywords(keywords=['$msft', 'microsoft', 'nadella', 'windows'], df=df)
print(counts_msft)

with open("msft_sentiments.json", "w") as outf:
    json.dump(counts_msft, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 129, 'n': 355}, '2020-04-10': {'p': 52, 'n': 177}, '2020-04-11': {'p': 48, 'n': 118}, '2020-04-12': {'p': 82, 'n': 204}, '2020-04-13': {'p': 124, 'n': 337}, '2020-04-14': {'p': 199, 'n': 550}, '2020-04-15': {'p': 163, 'n': 584}, '2020-04-16': {'p': 206, 'n': 631}, '2020-04-17': {'p': 187, 'n': 1083}, '2020-04-18': {'p': 85, 'n': 401}, '2020-04-19': {'p': 96, 'n': 315}, '2020-04-20': {'p': 167, 'n': 551}, '2020-04-21': {'p': 235, 'n': 473}, '2020-04-22': {'p': 176, 'n': 469}, '2020-04-23': {'p': 165, 'n': 512}, '2020-04-24': {'p': 185, 'n': 561}, '2020-04-25': {'p': 118, 'n': 777}, '2020-04-26': {'p': 143, 'n': 646}, '2020-04-27': {'p': 233, 'n': 891}, '2020-04-28': {'p': 198, 'n': 604}, '2020-04-29': {'p': 674, 'n': 1749}, '2020-05-03': {'p': 47, 'n': 191}, '2020-05-04': {'p': 235, 'n': 531}, '2020-05-05': {'p': 

In [141]:
counts_bbrk = count_tweet_sentiment_by_keywords(keywords=['$bbr', '$bbrk', '$bbrkb', 'bbr', 'bbrk', 'bbrkb', 'berkshire', 'hathaway', 'warren', 'buffet', 'geico'], df=df)
print(counts_bbrk)

with open("bbrk_sentiments.json", "w") as outf:
    json.dump(counts_bbrk, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 5, 'n': 4}, '2020-04-10': {'p': 4, 'n': 3}, '2020-04-11': {'p': 7, 'n': 9}, '2020-04-12': {'p': 9, 'n': 8}, '2020-04-13': {'p': 13, 'n': 3}, '2020-04-14': {'p': 18, 'n': 4}, '2020-04-15': {'p': 10, 'n': 4}, '2020-04-16': {'p': 18, 'n': 6}, '2020-04-17': {'p': 93, 'n': 8}, '2020-04-18': {'p': 25, 'n': 2}, '2020-04-19': {'p': 5, 'n': 0}, '2020-04-20': {'p': 10, 'n': 3}, '2020-04-21': {'p': 14, 'n': 3}, '2020-04-22': {'p': 12, 'n': 6}, '2020-04-23': {'p': 18, 'n': 9}, '2020-04-24': {'p': 3, 'n': 4}, '2020-04-25': {'p': 11, 'n': 3}, '2020-04-26': {'p': 6, 'n': 0}, '2020-04-27': {'p': 6, 'n': 3}, '2020-04-28': {'p': 25, 'n': 5}, '2020-04-29': {'p': 14, 'n': 3}, '2020-05-03': {'p': 210, 'n': 82}, '2020-05-04': {'p': 118, 'n': 62}, '2020-05-05': {'p': 53, 'n': 18}, '2020-05-06': {'p': 28, 'n': 18}, '2020-05-07': {'p': 2

In [142]:
counts_fb = count_tweet_sentiment_by_keywords(keywords=['fb', 'facebook', 'meta', 'zuckerberg', 'oculus', 'instagram', 'whatsapp', '$fb'], df=df)
print(counts_fb)

with open("fb_sentiments.json", "w") as outf:
    json.dump(counts_fb, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 173, 'n': 267}, '2020-04-10': {'p': 91, 'n': 171}, '2020-04-11': {'p': 58, 'n': 124}, '2020-04-12': {'p': 62, 'n': 204}, '2020-04-13': {'p': 156, 'n': 357}, '2020-04-14': {'p': 240, 'n': 422}, '2020-04-15': {'p': 298, 'n': 337}, '2020-04-16': {'p': 267, 'n': 416}, '2020-04-17': {'p': 209, 'n': 860}, '2020-04-18': {'p': 212, 'n': 313}, '2020-04-19': {'p': 181, 'n': 265}, '2020-04-20': {'p': 364, 'n': 455}, '2020-04-21': {'p': 248, 'n': 446}, '2020-04-22': {'p': 638, 'n': 702}, '2020-04-23': {'p': 319, 'n': 570}, '2020-04-24': {'p': 1394, 'n': 812}, '2020-04-25': {'p': 544, 'n': 818}, '2020-04-26': {'p': 217, 'n': 686}, '2020-04-27': {'p': 568, 'n': 888}, '2020-04-28': {'p': 498, 'n': 664}, '2020-04-29': {'p': 1026, 'n': 1860}, '2020-05-03': {'p': 111, 'n': 267}, '2020-05-04': {'p': 199, 'n': 477}, '2020-05-05': {'

In [143]:
counts_jnj = count_tweet_sentiment_by_keywords(keywords=['$jnj', 'jnj', 'johnson', 'janssen', 'duato'], df=df)
print(counts_jnj)

with open("jnj_sentiments.json", "w") as outf:
    json.dump(counts_jnj, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 38, 'n': 79}, '2020-04-10': {'p': 25, 'n': 53}, '2020-04-11': {'p': 174, 'n': 67}, '2020-04-12': {'p': 128, 'n': 173}, '2020-04-13': {'p': 143, 'n': 230}, '2020-04-14': {'p': 445, 'n': 731}, '2020-04-15': {'p': 116, 'n': 291}, '2020-04-16': {'p': 82, 'n': 200}, '2020-04-17': {'p': 49, 'n': 186}, '2020-04-18': {'p': 21, 'n': 89}, '2020-04-19': {'p': 25, 'n': 126}, '2020-04-20': {'p': 38, 'n': 177}, '2020-04-21': {'p': 23, 'n': 139}, '2020-04-22': {'p': 40, 'n': 169}, '2020-04-23': {'p': 35, 'n': 209}, '2020-04-24': {'p': 47, 'n': 154}, '2020-04-25': {'p': 20, 'n': 91}, '2020-04-26': {'p': 7, 'n': 47}, '2020-04-27': {'p': 24, 'n': 84}, '2020-04-28': {'p': 34, 'n': 124}, '2020-04-29': {'p': 32, 'n': 131}, '2020-05-03': {'p': 11, 'n': 24}, '2020-05-04': {'p': 32, 'n': 73}, '2020-05-05': {'p': 33, 'n': 108}, '2020-05-

In [148]:
counts_jpm = count_tweet_sentiment_by_keywords(keywords=['jpm', '$jpm', 'jp', 'morgan', 'jpmorgan', 'chase', 'dimon'], df=df)
print(counts_jpm)

with open("jpm_sentiments.json", "w") as outf:
    json.dump(counts_jpm, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 208, 'n': 370}, '2020-04-10': {'p': 77, 'n': 149}, '2020-04-11': {'p': 207, 'n': 106}, '2020-04-12': {'p': 202, 'n': 245}, '2020-04-13': {'p': 339, 'n': 347}, '2020-04-14': {'p': 943, 'n': 1268}, '2020-04-15': {'p': 339, 'n': 435}, '2020-04-16': {'p': 193, 'n': 266}, '2020-04-17': {'p': 292, 'n': 232}, '2020-04-18': {'p': 139, 'n': 136}, '2020-04-19': {'p': 115, 'n': 147}, '2020-04-20': {'p': 147, 'n': 192}, '2020-04-21': {'p': 165, 'n': 176}, '2020-04-22': {'p': 207, 'n': 279}, '2020-04-23': {'p': 193, 'n': 256}, '2020-04-24': {'p': 131, 'n': 149}, '2020-04-25': {'p': 35, 'n': 89}, '2020-04-26': {'p': 29, 'n': 51}, '2020-04-27': {'p': 145, 'n': 124}, '2020-04-28': {'p': 115, 'n': 107}, '2020-04-29': {'p': 88, 'n': 125}, '2020-05-03': {'p': 37, 'n': 49}, '2020-05-04': {'p': 179, 'n': 127}, '2020-05-05': {'p': 113

In [149]:
counts_visa = count_tweet_sentiment_by_keywords(keywords=['visa', '$v' 'cybersource', 'hock'], df=df)
print(counts_visa)

with open("visa_sentiments.json", "w") as outf:
    json.dump(counts_visa, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 91, 'n': 29}, '2020-04-10': {'p': 8, 'n': 3}, '2020-04-11': {'p': 2, 'n': 6}, '2020-04-12': {'p': 2, 'n': 7}, '2020-04-13': {'p': 5, 'n': 9}, '2020-04-14': {'p': 3, 'n': 20}, '2020-04-15': {'p': 6, 'n': 16}, '2020-04-16': {'p': 7, 'n': 2}, '2020-04-17': {'p': 10, 'n': 6}, '2020-04-18': {'p': 3, 'n': 6}, '2020-04-19': {'p': 4, 'n': 6}, '2020-04-20': {'p': 6, 'n': 5}, '2020-04-21': {'p': 5, 'n': 3}, '2020-04-22': {'p': 6, 'n': 11}, '2020-04-23': {'p': 7, 'n': 12}, '2020-04-24': {'p': 6, 'n': 5}, '2020-04-25': {'p': 4, 'n': 3}, '2020-04-26': {'p': 1, 'n': 6}, '2020-04-27': {'p': 4, 'n': 30}, '2020-04-28': {'p': 4, 'n': 5}, '2020-04-29': {'p': 11, 'n': 6}, '2020-05-03': {'p': 10, 'n': 1}, '2020-05-04': {'p': 7, 'n': 20}, '2020-05-05': {'p': 14, 'n': 11}, '2020-05-06': {'p': 10, 'n': 4}, '2020-05-07': {'p': 12, 'n': 9

In [150]:
counts_pg = count_tweet_sentiment_by_keywords(keywords=['$pg', 'pg', 'procter', 'gamble', 'moeller', 'braun', 'gillette'], df=df)
print(counts_pg)

with open("pg_sentiments.json", "w") as outf:
    json.dump(counts_pg, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 22, 'n': 30}, '2020-04-10': {'p': 7, 'n': 9}, '2020-04-11': {'p': 4, 'n': 10}, '2020-04-12': {'p': 6, 'n': 13}, '2020-04-13': {'p': 20, 'n': 43}, '2020-04-14': {'p': 68, 'n': 71}, '2020-04-15': {'p': 83, 'n': 69}, '2020-04-16': {'p': 53, 'n': 49}, '2020-04-17': {'p': 141, 'n': 204}, '2020-04-18': {'p': 34, 'n': 35}, '2020-04-19': {'p': 23, 'n': 20}, '2020-04-20': {'p': 34, 'n': 59}, '2020-04-21': {'p': 31, 'n': 36}, '2020-04-22': {'p': 56, 'n': 45}, '2020-04-23': {'p': 60, 'n': 40}, '2020-04-24': {'p': 41, 'n': 33}, '2020-04-25': {'p': 19, 'n': 8}, '2020-04-26': {'p': 22, 'n': 20}, '2020-04-27': {'p': 12, 'n': 38}, '2020-04-28': {'p': 14, 'n': 30}, '2020-04-29': {'p': 8, 'n': 28}, '2020-05-03': {'p': 9, 'n': 10}, '2020-05-04': {'p': 94, 'n': 19}, '2020-05-05': {'p': 17, 'n': 22}, '2020-05-06': {'p': 16, 'n': 31},

In [151]:
counts_ma = count_tweet_sentiment_by_keywords(keywords=['ma', '$ma', 'mastercard', 'miebach', 'wells', 'fargo', 'transfast'], df=df)
print(counts_ma)

with open("ma_sentiments.json", "w") as outf:
    json.dump(counts_ma, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 164, 'n': 73}, '2020-04-10': {'p': 30, 'n': 24}, '2020-04-11': {'p': 37, 'n': 23}, '2020-04-12': {'p': 23, 'n': 19}, '2020-04-13': {'p': 31, 'n': 50}, '2020-04-14': {'p': 100, 'n': 81}, '2020-04-15': {'p': 85, 'n': 83}, '2020-04-16': {'p': 71, 'n': 82}, '2020-04-17': {'p': 71, 'n': 74}, '2020-04-18': {'p': 31, 'n': 27}, '2020-04-19': {'p': 30, 'n': 32}, '2020-04-20': {'p': 108, 'n': 58}, '2020-04-21': {'p': 45, 'n': 46}, '2020-04-22': {'p': 69, 'n': 46}, '2020-04-23': {'p': 42, 'n': 65}, '2020-04-24': {'p': 51, 'n': 41}, '2020-04-25': {'p': 71, 'n': 64}, '2020-04-26': {'p': 53, 'n': 54}, '2020-04-27': {'p': 73, 'n': 101}, '2020-04-28': {'p': 88, 'n': 134}, '2020-04-29': {'p': 234, 'n': 167}, '2020-05-03': {'p': 29, 'n': 33}, '2020-05-04': {'p': 60, 'n': 62}, '2020-05-05': {'p': 75, 'n': 72}, '2020-05-06': {'p': 7

In [152]:
counts_intc = count_tweet_sentiment_by_keywords(keywords=['intc', '$intc', 'intel', 'pentium', 'gelsinger', 'mobileye', 'silicon'], df=df)
print(counts_intc)

with open("intc_sentiments.json", "w") as outf:
    json.dump(counts_intc, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 45, 'n': 126}, '2020-04-10': {'p': 28, 'n': 69}, '2020-04-11': {'p': 11, 'n': 42}, '2020-04-12': {'p': 9, 'n': 50}, '2020-04-13': {'p': 28, 'n': 106}, '2020-04-14': {'p': 32, 'n': 105}, '2020-04-15': {'p': 29, 'n': 84}, '2020-04-16': {'p': 36, 'n': 97}, '2020-04-17': {'p': 40, 'n': 111}, '2020-04-18': {'p': 20, 'n': 99}, '2020-04-19': {'p': 25, 'n': 115}, '2020-04-20': {'p': 38, 'n': 164}, '2020-04-21': {'p': 32, 'n': 128}, '2020-04-22': {'p': 64, 'n': 139}, '2020-04-23': {'p': 305, 'n': 641}, '2020-04-24': {'p': 187, 'n': 391}, '2020-04-25': {'p': 28, 'n': 110}, '2020-04-26': {'p': 19, 'n': 79}, '2020-04-27': {'p': 51, 'n': 109}, '2020-04-28': {'p': 32, 'n': 100}, '2020-04-29': {'p': 41, 'n': 121}, '2020-05-03': {'p': 18, 'n': 46}, '2020-05-04': {'p': 60, 'n': 96}, '2020-05-05': {'p': 53, 'n': 90}, '2020-05-06':

In [153]:
counts_unh = count_tweet_sentiment_by_keywords(keywords=['unh', '$unh', 'unitedhealth', 'uhg', 'optum'], df=df)
print(counts_unh)

with open("unh_sentiments.json", "w") as outf:
    json.dump(counts_unh, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 35, 'n': 39}, '2020-04-10': {'p': 20, 'n': 28}, '2020-04-11': {'p': 162, 'n': 27}, '2020-04-12': {'p': 111, 'n': 119}, '2020-04-13': {'p': 86, 'n': 54}, '2020-04-14': {'p': 113, 'n': 57}, '2020-04-15': {'p': 230, 'n': 207}, '2020-04-16': {'p': 90, 'n': 122}, '2020-04-17': {'p': 90, 'n': 105}, '2020-04-18': {'p': 30, 'n': 51}, '2020-04-19': {'p': 32, 'n': 23}, '2020-04-20': {'p': 17, 'n': 36}, '2020-04-21': {'p': 42, 'n': 39}, '2020-04-22': {'p': 27, 'n': 30}, '2020-04-23': {'p': 16, 'n': 71}, '2020-04-24': {'p': 30, 'n': 61}, '2020-04-25': {'p': 16, 'n': 51}, '2020-04-26': {'p': 18, 'n': 15}, '2020-04-27': {'p': 23, 'n': 36}, '2020-04-28': {'p': 12, 'n': 21}, '2020-04-29': {'p': 12, 'n': 13}, '2020-05-03': {'p': 7, 'n': 7}, '2020-05-04': {'p': 13, 'n': 17}, '2020-05-05': {'p': 11, 'n': 22}, '2020-05-06': {'p': 18

In [154]:
counts_bofa = count_tweet_sentiment_by_keywords(keywords=['bofa', 'bac', '$bac', 'moynihan', 'merrill', 'bankofamerica'], df=df)
print(counts_bofa)

with open("bofa_sentiments.json", "w") as outf:
    json.dump(counts_bofa, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 83, 'n': 182}, '2020-04-10': {'p': 45, 'n': 96}, '2020-04-11': {'p': 169, 'n': 61}, '2020-04-12': {'p': 118, 'n': 175}, '2020-04-13': {'p': 156, 'n': 224}, '2020-04-14': {'p': 179, 'n': 263}, '2020-04-15': {'p': 236, 'n': 568}, '2020-04-16': {'p': 105, 'n': 207}, '2020-04-17': {'p': 80, 'n': 213}, '2020-04-18': {'p': 21, 'n': 68}, '2020-04-19': {'p': 22, 'n': 89}, '2020-04-20': {'p': 45, 'n': 181}, '2020-04-21': {'p': 62, 'n': 163}, '2020-04-22': {'p': 185, 'n': 172}, '2020-04-23': {'p': 65, 'n': 182}, '2020-04-24': {'p': 55, 'n': 136}, '2020-04-25': {'p': 9, 'n': 78}, '2020-04-26': {'p': 49, 'n': 86}, '2020-04-27': {'p': 74, 'n': 140}, '2020-04-28': {'p': 69, 'n': 139}, '2020-04-29': {'p': 45, 'n': 105}, '2020-05-03': {'p': 18, 'n': 49}, '2020-05-04': {'p': 28, 'n': 97}, '2020-05-05': {'p': 22, 'n': 93}, '2020-0

In [155]:
counts_atnt = count_tweet_sentiment_by_keywords(keywords=['$t', 'atnt', 'telecommunication', 'telecommunications', 'stankey'], df=df)
print(counts_atnt)

with open("atnt_sentiments.json", "w") as outf:
    json.dump(counts_atnt, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 30, 'n': 59}, '2020-04-10': {'p': 34, 'n': 69}, '2020-04-11': {'p': 10, 'n': 24}, '2020-04-12': {'p': 17, 'n': 26}, '2020-04-13': {'p': 46, 'n': 96}, '2020-04-14': {'p': 60, 'n': 75}, '2020-04-15': {'p': 77, 'n': 103}, '2020-04-16': {'p': 50, 'n': 77}, '2020-04-17': {'p': 51, 'n': 102}, '2020-04-18': {'p': 28, 'n': 88}, '2020-04-19': {'p': 44, 'n': 94}, '2020-04-20': {'p': 63, 'n': 129}, '2020-04-21': {'p': 77, 'n': 90}, '2020-04-22': {'p': 124, 'n': 285}, '2020-04-23': {'p': 79, 'n': 133}, '2020-04-24': {'p': 206, 'n': 177}, '2020-04-25': {'p': 42, 'n': 57}, '2020-04-26': {'p': 47, 'n': 60}, '2020-04-27': {'p': 69, 'n': 96}, '2020-04-28': {'p': 61, 'n': 84}, '2020-04-29': {'p': 60, 'n': 71}, '2020-05-03': {'p': 22, 'n': 29}, '2020-05-04': {'p': 42, 'n': 62}, '2020-05-05': {'p': 44, 'n': 54}, '2020-05-06': {'p': 

In [156]:
counts_hd = count_tweet_sentiment_by_keywords(keywords=['$hd', 'depot', 'homedepot', 'decker'], df=df)
print(counts_hd)

with open("hd_sentiments.json", "w") as outf:
    json.dump(counts_hd, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 9, 'n': 47}, '2020-04-10': {'p': 9, 'n': 35}, '2020-04-11': {'p': 7, 'n': 34}, '2020-04-12': {'p': 52, 'n': 26}, '2020-04-13': {'p': 18, 'n': 41}, '2020-04-14': {'p': 14, 'n': 67}, '2020-04-15': {'p': 24, 'n': 101}, '2020-04-16': {'p': 15, 'n': 72}, '2020-04-17': {'p': 23, 'n': 89}, '2020-04-18': {'p': 15, 'n': 51}, '2020-04-19': {'p': 16, 'n': 65}, '2020-04-20': {'p': 18, 'n': 81}, '2020-04-21': {'p': 10, 'n': 73}, '2020-04-22': {'p': 19, 'n': 51}, '2020-04-23': {'p': 11, 'n': 49}, '2020-04-24': {'p': 23, 'n': 61}, '2020-04-25': {'p': 14, 'n': 65}, '2020-04-26': {'p': 31, 'n': 52}, '2020-04-27': {'p': 45, 'n': 70}, '2020-04-28': {'p': 11, 'n': 37}, '2020-04-29': {'p': 13, 'n': 36}, '2020-05-03': {'p': 14, 'n': 49}, '2020-05-04': {'p': 23, 'n': 66}, '2020-05-05': {'p': 34, 'n': 73}, '2020-05-06': {'p': 20, 'n': 6

In [158]:
counts_xom = count_tweet_sentiment_by_keywords(keywords=['$xom', 'exxon', 'mobil', 'xom', 'oil'], df=df)
print(counts_xom)

with open("xom_sentiments.json", "w") as outf:
    json.dump(counts_xom, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 287, 'n': 193}, '2020-04-10': {'p': 130, 'n': 68}, '2020-04-11': {'p': 81, 'n': 36}, '2020-04-12': {'p': 137, 'n': 79}, '2020-04-13': {'p': 198, 'n': 151}, '2020-04-14': {'p': 192, 'n': 164}, '2020-04-15': {'p': 205, 'n': 169}, '2020-04-16': {'p': 125, 'n': 126}, '2020-04-17': {'p': 223, 'n': 170}, '2020-04-18': {'p': 68, 'n': 41}, '2020-04-19': {'p': 87, 'n': 68}, '2020-04-20': {'p': 625, 'n': 755}, '2020-04-21': {'p': 565, 'n': 589}, '2020-04-22': {'p': 446, 'n': 347}, '2020-04-23': {'p': 337, 'n': 256}, '2020-04-24': {'p': 191, 'n': 171}, '2020-04-25': {'p': 109, 'n': 120}, '2020-04-26': {'p': 116, 'n': 121}, '2020-04-27': {'p': 306, 'n': 243}, '2020-04-28': {'p': 184, 'n': 188}, '2020-04-29': {'p': 193, 'n': 219}, '2020-05-03': {'p': 90, 'n': 52}, '2020-05-04': {'p': 206, 'n': 134}, '2020-05-05': {'p': 200, '

In [159]:
counts_dis = count_tweet_sentiment_by_keywords(keywords=['$dis', 'disney', 'walt', 'hulu'], df=df)
print(counts_dis)

with open("dis_sentiments.json", "w") as outf:
    json.dump(counts_dis, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 355, 'n': 542}, '2020-04-10': {'p': 104, 'n': 133}, '2020-04-11': {'p': 77, 'n': 116}, '2020-04-12': {'p': 117, 'n': 130}, '2020-04-13': {'p': 217, 'n': 360}, '2020-04-14': {'p': 186, 'n': 282}, '2020-04-15': {'p': 147, 'n': 238}, '2020-04-16': {'p': 131, 'n': 256}, '2020-04-17': {'p': 152, 'n': 272}, '2020-04-18': {'p': 65, 'n': 98}, '2020-04-19': {'p': 123, 'n': 155}, '2020-04-20': {'p': 266, 'n': 337}, '2020-04-21': {'p': 206, 'n': 276}, '2020-04-22': {'p': 122, 'n': 243}, '2020-04-23': {'p': 161, 'n': 255}, '2020-04-24': {'p': 125, 'n': 181}, '2020-04-25': {'p': 45, 'n': 87}, '2020-04-26': {'p': 50, 'n': 125}, '2020-04-27': {'p': 135, 'n': 186}, '2020-04-28': {'p': 118, 'n': 199}, '2020-04-29': {'p': 137, 'n': 205}, '2020-05-03': {'p': 122, 'n': 262}, '2020-05-04': {'p': 261, 'n': 420}, '2020-05-05': {'p': 12

In [161]:
counts_vz = count_tweet_sentiment_by_keywords(keywords=['$vz', 'verizon'], df=df)
print(counts_vz)

with open("vz_sentiments.json", "w") as outf:
    json.dump(counts_vz, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 17, 'n': 28}, '2020-04-10': {'p': 14, 'n': 25}, '2020-04-11': {'p': 12, 'n': 7}, '2020-04-12': {'p': 8, 'n': 10}, '2020-04-13': {'p': 20, 'n': 26}, '2020-04-14': {'p': 32, 'n': 32}, '2020-04-15': {'p': 32, 'n': 28}, '2020-04-16': {'p': 145, 'n': 81}, '2020-04-17': {'p': 41, 'n': 72}, '2020-04-18': {'p': 16, 'n': 61}, '2020-04-19': {'p': 21, 'n': 118}, '2020-04-20': {'p': 30, 'n': 78}, '2020-04-21': {'p': 23, 'n': 46}, '2020-04-22': {'p': 28, 'n': 42}, '2020-04-23': {'p': 75, 'n': 78}, '2020-04-24': {'p': 157, 'n': 212}, '2020-04-25': {'p': 17, 'n': 26}, '2020-04-26': {'p': 10, 'n': 29}, '2020-04-27': {'p': 21, 'n': 61}, '2020-04-28': {'p': 34, 'n': 31}, '2020-04-29': {'p': 28, 'n': 30}, '2020-05-03': {'p': 5, 'n': 7}, '2020-05-04': {'p': 14, 'n': 23}, '2020-05-05': {'p': 21, 'n': 22}, '2020-05-06': {'p': 16, 'n':

In [162]:
counts_ko = count_tweet_sentiment_by_keywords(keywords=['$ko', 'coca', 'cola', 'cocacola', 'pepsi', 'pepsico', 'nooyi'], df=df)
print(counts_ko)

with open("ko_sentiments.json", "w") as outf:
    json.dump(counts_ko, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 25, 'n': 23}, '2020-04-10': {'p': 16, 'n': 29}, '2020-04-11': {'p': 13, 'n': 19}, '2020-04-12': {'p': 19, 'n': 21}, '2020-04-13': {'p': 25, 'n': 43}, '2020-04-14': {'p': 35, 'n': 52}, '2020-04-15': {'p': 22, 'n': 74}, '2020-04-16': {'p': 27, 'n': 59}, '2020-04-17': {'p': 18, 'n': 60}, '2020-04-18': {'p': 31, 'n': 98}, '2020-04-19': {'p': 31, 'n': 131}, '2020-04-20': {'p': 61, 'n': 133}, '2020-04-21': {'p': 231, 'n': 274}, '2020-04-22': {'p': 63, 'n': 91}, '2020-04-23': {'p': 32, 'n': 58}, '2020-04-24': {'p': 33, 'n': 59}, '2020-04-25': {'p': 19, 'n': 58}, '2020-04-26': {'p': 27, 'n': 53}, '2020-04-27': {'p': 77, 'n': 58}, '2020-04-28': {'p': 176, 'n': 96}, '2020-04-29': {'p': 42, 'n': 45}, '2020-05-03': {'p': 28, 'n': 33}, '2020-05-04': {'p': 19, 'n': 27}, '2020-05-05': {'p': 58, 'n': 27}, '2020-05-06': {'p': 57,

In [163]:
counts_mrk = count_tweet_sentiment_by_keywords(keywords=['$mrk', 'merck', 'pharma', 'pharmaceutical', 'acceleron'], df=df)
print(counts_mrk)

with open("mrk_sentiments.json", "w") as outf:
    json.dump(counts_mrk, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 47, 'n': 37}, '2020-04-10': {'p': 32, 'n': 23}, '2020-04-11': {'p': 24, 'n': 28}, '2020-04-12': {'p': 20, 'n': 10}, '2020-04-13': {'p': 46, 'n': 54}, '2020-04-14': {'p': 53, 'n': 95}, '2020-04-15': {'p': 73, 'n': 55}, '2020-04-16': {'p': 60, 'n': 62}, '2020-04-17': {'p': 49, 'n': 63}, '2020-04-18': {'p': 30, 'n': 19}, '2020-04-19': {'p': 31, 'n': 23}, '2020-04-20': {'p': 81, 'n': 80}, '2020-04-21': {'p': 58, 'n': 56}, '2020-04-22': {'p': 64, 'n': 49}, '2020-04-23': {'p': 50, 'n': 94}, '2020-04-24': {'p': 75, 'n': 61}, '2020-04-25': {'p': 23, 'n': 38}, '2020-04-26': {'p': 49, 'n': 53}, '2020-04-27': {'p': 91, 'n': 113}, '2020-04-28': {'p': 219, 'n': 186}, '2020-04-29': {'p': 73, 'n': 83}, '2020-05-03': {'p': 26, 'n': 26}, '2020-05-04': {'p': 57, 'n': 62}, '2020-05-05': {'p': 40, 'n': 77}, '2020-05-06': {'p': 51, '

In [164]:
counts_cmcsa = count_tweet_sentiment_by_keywords(keywords=['$cmcsa', 'comcast', 'nbc', 'xfinity'], df=df)
print(counts_cmcsa)

with open("cmcsa_sentiments.json", "w") as outf:
    json.dump(counts_cmcsa, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 13, 'n': 25}, '2020-04-10': {'p': 13, 'n': 24}, '2020-04-11': {'p': 5, 'n': 5}, '2020-04-12': {'p': 9, 'n': 7}, '2020-04-13': {'p': 6, 'n': 25}, '2020-04-14': {'p': 26, 'n': 29}, '2020-04-15': {'p': 22, 'n': 34}, '2020-04-16': {'p': 16, 'n': 30}, '2020-04-17': {'p': 7, 'n': 44}, '2020-04-18': {'p': 10, 'n': 11}, '2020-04-19': {'p': 7, 'n': 11}, '2020-04-20': {'p': 19, 'n': 43}, '2020-04-21': {'p': 33, 'n': 38}, '2020-04-22': {'p': 15, 'n': 26}, '2020-04-23': {'p': 17, 'n': 29}, '2020-04-24': {'p': 22, 'n': 30}, '2020-04-25': {'p': 13, 'n': 17}, '2020-04-26': {'p': 6, 'n': 22}, '2020-04-27': {'p': 16, 'n': 31}, '2020-04-28': {'p': 13, 'n': 37}, '2020-04-29': {'p': 31, 'n': 96}, '2020-05-03': {'p': 13, 'n': 13}, '2020-05-04': {'p': 23, 'n': 29}, '2020-05-05': {'p': 16, 'n': 26}, '2020-05-06': {'p': 40, 'n': 31}, '2

In [165]:
counts_cvx = count_tweet_sentiment_by_keywords(keywords=['$cvx', 'cvx', 'chevron', 'oil', 'texaco'], df=df)
print(counts_cvx)

with open("cvx_sentiments.json", "w") as outf:
    json.dump(counts_cvx, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 177, 'n': 126}, '2020-04-10': {'p': 90, 'n': 59}, '2020-04-11': {'p': 67, 'n': 27}, '2020-04-12': {'p': 116, 'n': 66}, '2020-04-13': {'p': 157, 'n': 103}, '2020-04-14': {'p': 127, 'n': 139}, '2020-04-15': {'p': 172, 'n': 127}, '2020-04-16': {'p': 96, 'n': 82}, '2020-04-17': {'p': 157, 'n': 107}, '2020-04-18': {'p': 45, 'n': 33}, '2020-04-19': {'p': 65, 'n': 46}, '2020-04-20': {'p': 541, 'n': 637}, '2020-04-21': {'p': 487, 'n': 527}, '2020-04-22': {'p': 384, 'n': 296}, '2020-04-23': {'p': 319, 'n': 217}, '2020-04-24': {'p': 161, 'n': 161}, '2020-04-25': {'p': 78, 'n': 83}, '2020-04-26': {'p': 106, 'n': 92}, '2020-04-27': {'p': 271, 'n': 207}, '2020-04-28': {'p': 166, 'n': 153}, '2020-04-29': {'p': 150, 'n': 156}, '2020-05-03': {'p': 86, 'n': 37}, '2020-05-04': {'p': 165, 'n': 131}, '2020-05-05': {'p': 176, 'n': 12

In [166]:
counts_pep = count_tweet_sentiment_by_keywords(keywords=['$pep', 'pepsi', 'pepsico', 'tropicana'], df=df)
print(counts_pep)

with open("pep_sentiments.json", "w") as outf:
    json.dump(counts_pep, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 20, 'n': 18}, '2020-04-10': {'p': 9, 'n': 24}, '2020-04-11': {'p': 4, 'n': 13}, '2020-04-12': {'p': 8, 'n': 20}, '2020-04-13': {'p': 11, 'n': 26}, '2020-04-14': {'p': 27, 'n': 54}, '2020-04-15': {'p': 19, 'n': 63}, '2020-04-16': {'p': 14, 'n': 47}, '2020-04-17': {'p': 10, 'n': 52}, '2020-04-18': {'p': 14, 'n': 37}, '2020-04-19': {'p': 16, 'n': 55}, '2020-04-20': {'p': 22, 'n': 50}, '2020-04-21': {'p': 23, 'n': 51}, '2020-04-22': {'p': 13, 'n': 42}, '2020-04-23': {'p': 12, 'n': 11}, '2020-04-24': {'p': 27, 'n': 9}, '2020-04-25': {'p': 16, 'n': 46}, '2020-04-26': {'p': 77, 'n': 65}, '2020-04-27': {'p': 80, 'n': 64}, '2020-04-28': {'p': 285, 'n': 159}, '2020-04-29': {'p': 49, 'n': 48}, '2020-05-03': {'p': 18, 'n': 24}, '2020-05-04': {'p': 13, 'n': 32}, '2020-05-05': {'p': 58, 'n': 47}, '2020-05-06': {'p': 62, 'n': 3

In [167]:
counts_pfe = count_tweet_sentiment_by_keywords(keywords=['$pfe', 'pfizer'], df=df)
print(counts_pfe)

with open("pfe_sentiments.json", "w") as outf:
    json.dump(counts_pfe, outf)

At index 0
At index 100000
At index 200000
At index 300000
At index 400000
At index 500000
At index 600000
At index 700000
At index 800000
At index 900000
{'2020-04-09': {'p': 139, 'n': 104}, '2020-04-10': {'p': 38, 'n': 63}, '2020-04-11': {'p': 17, 'n': 26}, '2020-04-12': {'p': 4, 'n': 27}, '2020-04-13': {'p': 17, 'n': 75}, '2020-04-14': {'p': 20, 'n': 61}, '2020-04-15': {'p': 27, 'n': 121}, '2020-04-16': {'p': 16, 'n': 79}, '2020-04-17': {'p': 12, 'n': 86}, '2020-04-18': {'p': 8, 'n': 48}, '2020-04-19': {'p': 21, 'n': 51}, '2020-04-20': {'p': 29, 'n': 58}, '2020-04-21': {'p': 28, 'n': 48}, '2020-04-22': {'p': 78, 'n': 109}, '2020-04-23': {'p': 45, 'n': 106}, '2020-04-24': {'p': 47, 'n': 99}, '2020-04-25': {'p': 20, 'n': 473}, '2020-04-26': {'p': 31, 'n': 361}, '2020-04-27': {'p': 66, 'n': 261}, '2020-04-28': {'p': 229, 'n': 281}, '2020-04-29': {'p': 86, 'n': 156}, '2020-05-03': {'p': 25, 'n': 39}, '2020-05-04': {'p': 22, 'n': 60}, '2020-05-05': {'p': 119, 'n': 196}, '2020-05-06': {'p