In [1]:
import pandas as pd
import numpy as np

In [647]:
# Variable for determining if we should postprocess tweets from scratch.
# Set to false because we already have the postprocessed file available for reading.
postProcessTweets = False

# Variable for determining if we need to compute embeddings by label.
computeEmbeddingsByLabel = False

# Variable for determining if we need to compute cosine similarities.
computeCosineSimilarities = False

# Variable for determining if we need to compute sentiment
computeLastHourSentiments = False

computeLastHalfDaySentiments = True

computeLastDaySentiments = True

In [3]:
cool_cats_tweets_df = pd.read_csv('../twitter/data/preprocessed/coolcats_07-10_2021.csv')  

In [4]:
cool_cats_tweets_df.head(10)

Unnamed: 0,id,author_id,created_at,text
0,1454598113499533314,1452399811219628034,2021-10-30T23:56:30.000Z,"Cool Cat just listed under floor, seller NGMI:..."
1,1454597918191603716,47390321,2021-10-30T23:55:43.000Z,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....
2,1454596608411283458,1452399811219628034,2021-10-30T23:50:31.000Z,"Cool Cat just listed under floor, seller NGMI:..."
3,1454594614346883074,1371874606374588417,2021-10-30T23:42:35.000Z,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....
4,1454593025720688649,1435045789756235778,2021-10-30T23:36:17.000Z,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....
5,1454590584811925507,1422010380499488770,2021-10-30T23:26:35.000Z,CoolCats &amp; Goatz Mashup Halloween: #NewPro...
6,1454590075992477700,610878019,2021-10-30T23:24:33.000Z,RT @EthernalsNFT: How anxious are you to get o...
7,1454589750992687115,1452399811219628034,2021-10-30T23:23:16.000Z,Collections Floor Prices (live update):BAYC: 3...
8,1454588938585210888,1343771344274509824,2021-10-30T23:20:02.000Z,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....
9,1454588617435791361,1416458460854722561,2021-10-30T23:18:46.000Z,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....


In [5]:
# Create clean_text column with cleaned up tweet text: 
# - Lowercasing.
# - Removing whitespace noise.
# - Removing URLs.
# - Removing stopwords.
# - Lemmatization.

import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

import re

# Clean up a specific tweet.
def getCleanTweet(tweet):
    
    # Lowercasing.
    tweet = tweet.lower()
    
    # Removing extra spaces.
    tweet = " ".join(tweet.split())
    
    # Remove URLs
    tweet = re.sub(r"http\S+", "", tweet)
    
    tokenized_tweet = word_tokenize(tweet)
    
    # Removing stopwords.
    tokenized_tweet = [w for w in tokenized_tweet if not w in set(stopwords.words('english'))]
 
    # Stemming.
    ps = PorterStemmer()
    
    tokenized_tweet = [ps.stem(w) for w in tokenized_tweet]
    
    return " ".join(tokenized_tweet)

# Example
print("Original tweet: " + cool_cats_tweets_df['text'][100])
print("Cleaned tweet: " + getCleanTweet(cool_cats_tweets_df['text'][100]))

Original tweet: Say hello to the new member of our family#nfts #nft #digitalart #art #ctyptoart   #erhereum #nftlink #cryptoartist #blockchain #nftcollector #supducks #modernart #artcollectoe  #boredapeyachtclub #coolcats  #cryptopunk  #rectanglecats #cats #NFTGiveaway #SquidGame https://t.co/JinAlIHL7c
Cleaned tweet: say hello new member famili # nft # nft # digitalart # art # ctyptoart # erhereum # nftlink # cryptoartist # blockchain # nftcollector # supduck # modernart # artcollecto # boredapeyachtclub # coolcat # cryptopunk # rectanglecat # cat # nftgiveaway # squidgam


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\oswaldoolivo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\oswaldoolivo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [6]:
# Clean all tweets.

if postProcessTweets:
    import time 

    startTime = time.time()

    cleanTweets = []

    for i in range(len(cool_cats_tweets_df['text'])):
        if i % 5000 == 0:
            print("Cleaned " +str(i) + " out of " + str(len(cool_cats_tweets_df['text'])) + " tweets.")
        
        tweet = cool_cats_tweets_df['text'][i]
        cleanTweet = getCleanTweet(tweet)
    
        cleanTweets.append(cleanTweet)
    
    endTime = time.time()

    print("Cleaned tweets in " + str(endTime - startTime) + " seconds")

    cleanTweets[:10]

In [7]:
if postProcessTweets:
    cool_cats_tweets_df['cleaned_text'] = cleanTweets

    cool_cats_tweets_df.head(20)

In [8]:
# Save cleaned tweets to file.
if postProcessTweets:
    cool_cats_tweets_df.to_csv('../twitter/data/postprocessed/coolcats_07-10_2021.csv', index = True)

In [9]:
cool_cats_tweets_df = pd.read_csv('../twitter/data/postprocessed/coolcats_07-10_2021.csv')  

In [10]:
# Set string types for 'text' and 'cleaned_text'
cool_cats_tweets_df['text'] = cool_cats_tweets_df['text'].astype('string')
cool_cats_tweets_df['cleaned_text'] = cool_cats_tweets_df['text'].astype('string')

cool_cats_tweets_df.dtypes

Unnamed: 0       int64
id               int64
author_id        int64
created_at      object
text            string
cleaned_text    string
dtype: object

In [11]:
cool_cats_tweets_df.head(10)

Unnamed: 0.1,Unnamed: 0,id,author_id,created_at,text,cleaned_text
0,0,1454598113499533314,1452399811219628034,2021-10-30T23:56:30.000Z,"Cool Cat just listed under floor, seller NGMI:...","Cool Cat just listed under floor, seller NGMI:..."
1,1,1454597918191603716,47390321,2021-10-30T23:55:43.000Z,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....
2,2,1454596608411283458,1452399811219628034,2021-10-30T23:50:31.000Z,"Cool Cat just listed under floor, seller NGMI:...","Cool Cat just listed under floor, seller NGMI:..."
3,3,1454594614346883074,1371874606374588417,2021-10-30T23:42:35.000Z,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....
4,4,1454593025720688649,1435045789756235778,2021-10-30T23:36:17.000Z,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....
5,5,1454590584811925507,1422010380499488770,2021-10-30T23:26:35.000Z,CoolCats &amp; Goatz Mashup Halloween: #NewPro...,CoolCats &amp; Goatz Mashup Halloween: #NewPro...
6,6,1454590075992477700,610878019,2021-10-30T23:24:33.000Z,RT @EthernalsNFT: How anxious are you to get o...,RT @EthernalsNFT: How anxious are you to get o...
7,7,1454589750992687115,1452399811219628034,2021-10-30T23:23:16.000Z,Collections Floor Prices (live update):BAYC: 3...,Collections Floor Prices (live update):BAYC: 3...
8,8,1454588938585210888,1343771344274509824,2021-10-30T23:20:02.000Z,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....
9,9,1454588617435791361,1416458460854722561,2021-10-30T23:18:46.000Z,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....,RT @rex2_16: CoolCats went from: 1.5 ETH to 0....


In [681]:
len(cool_cats_tweets_df)

54453

In [12]:
cool_cats_sale_events_df = pd.read_csv('../opensea_client/sale_events.txt')

In [13]:
cool_cats_sale_events_df.head(10)

Unnamed: 0,token_id,collection,event_type,auction_type,bid_amount,ending_price,created_date,total_price,quantity
0,9932,cool-cats-nft,successful,,,,2021-10-21T19:16:22.725846,6550000000000000000,1
1,9932,cool-cats-nft,successful,,,,2021-10-11T17:16:09.440387,10490000000000000000,1
2,9932,cool-cats-nft,successful,,,,2021-09-23T08:30:18.304058,6000000000000000000,1
3,9932,cool-cats-nft,successful,,,,2021-08-29T01:44:44.449125,5250000000000000000,1
4,9925,cool-cats-nft,successful,,,,2021-08-17T03:46:32.538134,1510000000000000000,1
5,9925,cool-cats-nft,successful,,,,2021-08-13T17:59:33.661011,1660000000000000000,1
6,9925,cool-cats-nft,successful,,,,2021-08-04T23:52:14.987776,1150000000000000000,1
7,9924,cool-cats-nft,successful,,,,2021-08-05T12:07:20.368464,1740000000000000000,1
8,9923,cool-cats-nft,successful,,,,2021-07-17T05:50:57.697687,690000000000000000,1
9,9921,cool-cats-nft,successful,,,,2021-08-22T14:51:26.933741,1649000000000000000,1


In [14]:
from transformers import BertTokenizer, TFBertModel, BertConfig,TFDistilBertModel,DistilBertTokenizer,DistilBertConfig

In [15]:
distilbertTokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
distilbertModel = TFDistilBertModel.from_pretrained('distilbert-base-uncased')

Some layers from the model checkpoint at distilbert-base-uncased were not used when initializing TFDistilBertModel: ['vocab_layer_norm', 'vocab_transform', 'activation_13', 'vocab_projector']
- This IS expected if you are initializing TFDistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFDistilBertModel were initialized from the model checkpoint at distilbert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


In [16]:
sentences=cool_cats_tweets_df['text']

In [17]:
distilbertTokenizer.tokenize(sentences[0])

['cool',
 'cat',
 'just',
 'listed',
 'under',
 'floor',
 ',',
 'seller',
 'ng',
 '##mi',
 ':',
 ':',
 '6',
 '.',
 '48',
 '##38',
 ':',
 'https',
 ':',
 '/',
 '/',
 't',
 '.',
 'co',
 '/',
 'x',
 '##ng',
 '##9',
 '##m',
 '##w',
 '##vu',
 '##as',
 '##bu',
 '##ying',
 'the',
 'dip',
 'an',
 '##on',
 '?',
 '#',
 'cool',
 '##cats',
 '#',
 'cool',
 '##cats',
 '##n',
 '##ft',
 '#',
 'opens',
 '##ean',
 '##ft',
 '#',
 'n',
 '##ft',
 '#',
 'opens',
 '##ea']

In [18]:
distilbertInput=distilbertTokenizer.encode_plus(sentences[0],add_special_tokens = True,pad_to_max_length = True,truncation=True)
distilbertInput=distilbertTokenizer.encode_plus(sentences[0],add_special_tokens = True)
distilbertInput



{'input_ids': [101, 4658, 4937, 2074, 3205, 2104, 2723, 1010, 14939, 12835, 4328, 1024, 1024, 1020, 1012, 4466, 22025, 1024, 16770, 1024, 1013, 1013, 1056, 1012, 2522, 1013, 1060, 3070, 2683, 2213, 2860, 19722, 3022, 8569, 14147, 1996, 16510, 2019, 2239, 1029, 1001, 4658, 19588, 1001, 4658, 19588, 2078, 6199, 1001, 7480, 11219, 6199, 1001, 1050, 6199, 1001, 7480, 5243, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [19]:
idInput=np.asarray(distilbertInput['input_ids'])
maskInput=np.asarray(distilbertInput['attention_mask'])

distilbertOutput=distilbertModel([idInput.reshape(1,-1),maskInput.reshape(1,-1)])

type(distilbertOutput),distilbertOutput

(transformers.modeling_tf_outputs.TFBaseModelOutput,
 TFBaseModelOutput(last_hidden_state=<tf.Tensor: shape=(1, 59, 768), dtype=float32, numpy=
 array([[[-0.11779414, -0.15132035,  0.02275763, ..., -0.04923558,
           0.32646263,  0.5366365 ],
         [-0.1085775 , -0.24667631,  0.66228324, ...,  0.08109096,
           0.43866563,  0.44020706],
         [-0.29714283, -0.31835958,  0.43593767, ...,  0.05396344,
           0.04600083,  0.77221096],
         ...,
         [-0.16104284,  0.2466539 ,  0.68685853, ..., -0.1420151 ,
           0.15295202,  0.2981444 ],
         [ 0.1825399 ,  0.21809575,  0.35958427, ..., -0.06513596,
          -0.21891102, -0.5222688 ],
         [ 0.8622943 ,  0.27708182, -0.10701422, ...,  0.1955083 ,
          -0.46374887, -0.16059066]]], dtype=float32)>, hidden_states=None, attentions=None))

In [20]:
distilbertOutput[0][:,0,:]

<tf.Tensor: shape=(1, 768), dtype=float32, numpy=
array([[-1.17794141e-01, -1.51320353e-01,  2.27576271e-02,
        -6.12904504e-02, -6.19337708e-03, -2.25467801e-01,
         1.87008709e-01,  3.91300321e-01, -2.64210403e-01,
        -2.36914694e-01, -2.90164769e-01, -1.79498136e-01,
        -4.65925299e-02,  2.84286916e-01,  6.11369126e-03,
         1.11567259e-01, -1.96444124e-01,  4.30956721e-01,
        -1.63892172e-02,  1.03265606e-02,  1.04786173e-01,
        -5.82086325e-01,  5.03869057e-02, -2.77416348e-01,
        -1.11984462e-02, -1.70876756e-01,  1.18852153e-01,
        -1.76714525e-01, -2.01792225e-01,  1.44718289e-01,
        -6.97725341e-02,  2.06656590e-01,  9.76492912e-02,
        -4.28009421e-01,  1.06794544e-01, -1.98422834e-01,
         7.99453855e-02, -1.26169384e-01,  9.21885148e-02,
         2.55157590e-01, -8.72819349e-02,  1.10985130e-01,
         2.69452929e-01, -2.82108877e-03,  1.56285092e-01,
        -1.01892076e-01, -2.75846863e+00, -4.64077741e-02,
      

In [21]:
cool_cats_sale_events_df.head(10)

Unnamed: 0,token_id,collection,event_type,auction_type,bid_amount,ending_price,created_date,total_price,quantity
0,9932,cool-cats-nft,successful,,,,2021-10-21T19:16:22.725846,6550000000000000000,1
1,9932,cool-cats-nft,successful,,,,2021-10-11T17:16:09.440387,10490000000000000000,1
2,9932,cool-cats-nft,successful,,,,2021-09-23T08:30:18.304058,6000000000000000000,1
3,9932,cool-cats-nft,successful,,,,2021-08-29T01:44:44.449125,5250000000000000000,1
4,9925,cool-cats-nft,successful,,,,2021-08-17T03:46:32.538134,1510000000000000000,1
5,9925,cool-cats-nft,successful,,,,2021-08-13T17:59:33.661011,1660000000000000000,1
6,9925,cool-cats-nft,successful,,,,2021-08-04T23:52:14.987776,1150000000000000000,1
7,9924,cool-cats-nft,successful,,,,2021-08-05T12:07:20.368464,1740000000000000000,1
8,9923,cool-cats-nft,successful,,,,2021-07-17T05:50:57.697687,690000000000000000,1
9,9921,cool-cats-nft,successful,,,,2021-08-22T14:51:26.933741,1649000000000000000,1


In [22]:
# Get all pairs of sequential sale events.
def getAllSaleEventPairs():
    from csv import reader
    
    saleEventPairs = []
    lastSalesByIds = dict()
    
    with open('../opensea_client/sale_events.txt', 'r') as sale_events_file:
        csvReader = reader(sale_events_file)
        currentRowNumber = 0
        
        for row in csvReader:
            
            if currentRowNumber == 0:
                currentRowNumber += 1
                continue
            
            # Construct an entry with the current row as the latest sale, and the entry in the 
            # dictionary as the previous sale
            # Id, Start Sale Date, Start Sale Price, End Sale Date, End Sale Price.
            if row[0] in lastSalesByIds:
                lastSale = lastSalesByIds[row[0]]
                saleEventPairs.append([row[0], row[6], float(row[7]), lastSale[6], float(lastSale[7])])
                
            lastSalesByIds[row[0]] = row
                         
            currentRowNumber += 1
                
        return saleEventPairs
            
saleEventPairs = getAllSaleEventPairs()
len(saleEventPairs)

1571

In [23]:
saleEventPairs[:5]

[['9932',
  '2021-10-11T17:16:09.440387',
  1.049e+19,
  '2021-10-21T19:16:22.725846',
  6.55e+18],
 ['9932',
  '2021-09-23T08:30:18.304058',
  6e+18,
  '2021-10-11T17:16:09.440387',
  1.049e+19],
 ['9932',
  '2021-08-29T01:44:44.449125',
  5.25e+18,
  '2021-09-23T08:30:18.304058',
  6e+18],
 ['9925',
  '2021-08-13T17:59:33.661011',
  1.66e+18,
  '2021-08-17T03:46:32.538134',
  1.51e+18],
 ['9925',
  '2021-08-04T23:52:14.987776',
  1.15e+18,
  '2021-08-13T17:59:33.661011',
  1.66e+18]]

In [24]:
# Get all the tweets between a start and end time.
def getTweetsInTimeInterval(tweetsDF, startTime, endTime):
    return tweetsDF[(startTime <= tweetsDF.created_at) & (tweetsDF.created_at <= endTime)]

print(saleEventPairs[0])
getTweetsInTimeInterval(cool_cats_tweets_df, saleEventPairs[0][1], saleEventPairs[0][3])    

['9932', '2021-10-11T17:16:09.440387', 1.049e+19, '2021-10-21T19:16:22.725846', 6.55e+18]


Unnamed: 0.1,Unnamed: 0,id,author_id,created_at,text,cleaned_text
973,973,1449024283166920706,2894816197,2021-10-15T14:48:05.000Z,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...
974,974,1449024233858797575,1446221459647565824,2021-10-15T14:47:53.000Z,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...
975,975,1449024171367747587,1435441708641181697,2021-10-15T14:47:38.000Z,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...
976,976,1449024082771644459,1433619860970430464,2021-10-15T14:47:17.000Z,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...
977,977,1449024075746193411,1566750793,2021-10-15T14:47:15.000Z,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...
...,...,...,...,...,...,...
49575,49575,1449026142493609987,1448329451511681031,2021-10-15T14:55:28.000Z,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...
49576,49576,1449025616158855171,1058626370765574144,2021-10-15T14:53:23.000Z,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...
49577,49577,1449025541185499138,1316406360762793984,2021-10-15T14:53:05.000Z,RT @NFTNywIRA: Tonight it's been a month since...,RT @NFTNywIRA: Tonight it's been a month since...
49578,49578,1449024948052365367,1294698814683709446,2021-10-15T14:50:43.000Z,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...


In [25]:
# Measure time of computing all tweet intervals.
import time

startTime = time.time()

tweetsBetweenSaleEvents = \
[getTweetsInTimeInterval(cool_cats_tweets_df, saleEvent[1], saleEvent[3])['cleaned_text'] for saleEvent in saleEventPairs]
    
endTime = time.time()

print(endTime - startTime)

20.661802530288696


In [26]:
# Create data frame from sale pair events.
sale_pairs_df = pd.DataFrame(saleEventPairs)

sale_pairs_df.columns = ['id', 'start_sale_date', 'start_sale_price', 'end_sale_date', 'end_sale_price']
sale_pairs_df = sale_pairs_df.reset_index(drop=True)

sale_pairs_df

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price
0,9932,2021-10-11T17:16:09.440387,1.049000e+19,2021-10-21T19:16:22.725846,6.550000e+18
1,9932,2021-09-23T08:30:18.304058,6.000000e+18,2021-10-11T17:16:09.440387,1.049000e+19
2,9932,2021-08-29T01:44:44.449125,5.250000e+18,2021-09-23T08:30:18.304058,6.000000e+18
3,9925,2021-08-13T17:59:33.661011,1.660000e+18,2021-08-17T03:46:32.538134,1.510000e+18
4,9925,2021-08-04T23:52:14.987776,1.150000e+18,2021-08-13T17:59:33.661011,1.660000e+18
...,...,...,...,...,...
1566,8967,2021-07-17T16:20:05.458264,1.380000e+18,2021-09-17T21:16:43.614358,8.500000e+18
1567,8967,2021-07-09T01:40:21.637272,1.250000e+18,2021-07-17T16:20:05.458264,1.380000e+18
1568,8967,2021-07-08T13:00:04.209249,7.500000e+17,2021-07-09T01:40:21.637272,1.250000e+18
1569,8967,2021-07-08T07:34:39.258665,1.250000e+18,2021-07-08T13:00:04.209249,7.500000e+17


In [27]:
# Add percent change column.
sale_pairs_df['percent_change'] = \
    (sale_pairs_df['end_sale_price'] - sale_pairs_df['start_sale_price']) / sale_pairs_df['start_sale_price']

sale_pairs_df

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change
0,9932,2021-10-11T17:16:09.440387,1.049000e+19,2021-10-21T19:16:22.725846,6.550000e+18,-0.375596
1,9932,2021-09-23T08:30:18.304058,6.000000e+18,2021-10-11T17:16:09.440387,1.049000e+19,0.748333
2,9932,2021-08-29T01:44:44.449125,5.250000e+18,2021-09-23T08:30:18.304058,6.000000e+18,0.142857
3,9925,2021-08-13T17:59:33.661011,1.660000e+18,2021-08-17T03:46:32.538134,1.510000e+18,-0.090361
4,9925,2021-08-04T23:52:14.987776,1.150000e+18,2021-08-13T17:59:33.661011,1.660000e+18,0.443478
...,...,...,...,...,...,...
1566,8967,2021-07-17T16:20:05.458264,1.380000e+18,2021-09-17T21:16:43.614358,8.500000e+18,5.159420
1567,8967,2021-07-09T01:40:21.637272,1.250000e+18,2021-07-17T16:20:05.458264,1.380000e+18,0.104000
1568,8967,2021-07-08T13:00:04.209249,7.500000e+17,2021-07-09T01:40:21.637272,1.250000e+18,0.666667
1569,8967,2021-07-08T07:34:39.258665,1.250000e+18,2021-07-08T13:00:04.209249,7.500000e+17,-0.400000


In [28]:
# Adding momentum of 5 percent change.
# Change greater than 5% percent => UP.
# Change greater than -5% percent => DOWN.
# Change between -5% and 5% => FLAT.

sale_pairs_df.loc[sale_pairs_df["percent_change"] > 0.05, "five_percent_momentum"] = "UP"
sale_pairs_df.loc[sale_pairs_df["percent_change"] < -0.05, "five_percent_momentum"] = "DOWN"
sale_pairs_df.loc[(sale_pairs_df["percent_change"] >= -0.05) & (sale_pairs_df["percent_change"] <= 0.05),\
                  "five_percent_momentum"] = "FLAT"

sale_pairs_df

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum
0,9932,2021-10-11T17:16:09.440387,1.049000e+19,2021-10-21T19:16:22.725846,6.550000e+18,-0.375596,DOWN
1,9932,2021-09-23T08:30:18.304058,6.000000e+18,2021-10-11T17:16:09.440387,1.049000e+19,0.748333,UP
2,9932,2021-08-29T01:44:44.449125,5.250000e+18,2021-09-23T08:30:18.304058,6.000000e+18,0.142857,UP
3,9925,2021-08-13T17:59:33.661011,1.660000e+18,2021-08-17T03:46:32.538134,1.510000e+18,-0.090361,DOWN
4,9925,2021-08-04T23:52:14.987776,1.150000e+18,2021-08-13T17:59:33.661011,1.660000e+18,0.443478,UP
...,...,...,...,...,...,...,...
1566,8967,2021-07-17T16:20:05.458264,1.380000e+18,2021-09-17T21:16:43.614358,8.500000e+18,5.159420,UP
1567,8967,2021-07-09T01:40:21.637272,1.250000e+18,2021-07-17T16:20:05.458264,1.380000e+18,0.104000,UP
1568,8967,2021-07-08T13:00:04.209249,7.500000e+17,2021-07-09T01:40:21.637272,1.250000e+18,0.666667,UP
1569,8967,2021-07-08T07:34:39.258665,1.250000e+18,2021-07-08T13:00:04.209249,7.500000e+17,-0.400000,DOWN


In [29]:
# Concatenate all the tweets between sale events.

concatenatedTweetsBetweenSaleEvents = []

for i in range(len(tweetsBetweenSaleEvents)):
    tweetsBetweenSale = tweetsBetweenSaleEvents[i]
    
    concatenatedTweets = " ".join(tweetsBetweenSale)
    concatenatedTweetsBetweenSaleEvents.append(concatenatedTweets)
    
len(concatenatedTweetsBetweenSaleEvents)

1571

In [30]:
print(concatenatedTweetsBetweenSaleEvents[0][:100])
print(concatenatedTweetsBetweenSaleEvents[1][:100])

RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCatsNFT  Current price 9 #ETH To Enter: Follow me Like &am
Do you know why square cats are such good pets? You can hunts round dogsAccess to Future drops, give


In [31]:
# Add concatenated tweets to dataframe.
sale_pairs_df['all_tweets'] = concatenatedTweetsBetweenSaleEvents

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat..."
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...


In [32]:
# Get the last hour of tweets between sale events.
from dateutil import parser
from datetime import datetime, timedelta

startTime = time.time()

lastHourTweets = []

for saleEvent in saleEventPairs:
    endDate = saleEvent[3]
    
    parsedEndDate = parser.parse(endDate)
    
    oneHourAgo = str(parsedEndDate - timedelta(hours=0, minutes=60))
    
    tweetsSinceLastHour = getTweetsInTimeInterval(cool_cats_tweets_df, oneHourAgo, endDate)['cleaned_text']
    
    concatenatedTweetsSinceLastHour = " ".join(tweetsSinceLastHour)
    
    lastHourTweets.append(concatenatedTweetsSinceLastHour)

endTime = time.time()

print(endTime - startTime)

18.09748411178589


In [642]:
# Get the last half day of tweets between sale events.
startTime = time.time()

lastHalfDayTweets = []

for saleEvent in saleEventPairs:
    endDate = saleEvent[3]
    
    parsedEndDate = parser.parse(endDate)
    
    halfDay = str(parsedEndDate - timedelta(hours=12, minutes=0))
    
    tweetsSinceLastHalfDay = getTweetsInTimeInterval(cool_cats_tweets_df, halfDay, endDate)['cleaned_text']
    
    concatenatedTweetsSinceLastHalfDay = " ".join(tweetsSinceLastHalfDay)
    
    lastHalfDayTweets.append(concatenatedTweetsSinceLastHalfDay)

endTime = time.time()

print(endTime - startTime)

12.137023210525513


In [505]:
# Get the last day of tweets between sale events.
startTime = time.time()

lastDayTweets = []

for saleEvent in saleEventPairs:
    endDate = saleEvent[3]
    
    parsedEndDate = parser.parse(endDate)
    
    yesterday = str(parsedEndDate - timedelta(hours=24, minutes=0))
    
    tweetsSinceLastDay = getTweetsInTimeInterval(cool_cats_tweets_df, yesterday, endDate)['cleaned_text']
    
    concatenatedTweetsSinceLastDay = " ".join(tweetsSinceLastDay)
    
    lastDayTweets.append(concatenatedTweetsSinceLastDay)

endTime = time.time()

print(endTime - startTime)

11.105686902999878


In [33]:
# Add last hour tweets to the dataframe.
sale_pairs_df['last_hour_tweets'] = lastHourTweets

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat..."
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...


In [643]:
# Add last half day tweets to the dataframe.
sale_pairs_df['last_halfday_tweets'] = lastHalfDayTweets

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,...,last_day_topic_1,last_day_topic_2,last_day_topic_3,last_day_topic_4,last_day_topic_5,last_day_topic_6,last_day_topic_7,last_day_topic_8,last_day_topic_9,last_halfday_tweets
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,...,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,RT @gimmocrypto: This gif of @mutantcats purch...
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,...,0.00456,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,Do you know why square cats are such good pets...
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,...,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,"#24px - anon dev. no roadmap. just pixels, cat..."
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,...,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,...,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,RT @BullieverIsland: Celebrating Citizens of B...
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,...,0.010895,0.010895,0.010895,0.010895,0.010895,0.010895,0.010895,0.010896,0.010895,RT @DamianSpriggs: I have always wanted to be ...
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,...,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,...,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,...,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,Omg looooool 2.2M followers 50E saleI can't g...
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,...,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,RT @SpaceLabCrypto: If you are worried that yo...


In [644]:
# Add last day tweets to the dataframe.
sale_pairs_df['last_day_tweets'] = lastDayTweets

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,...,last_day_topic_1,last_day_topic_2,last_day_topic_3,last_day_topic_4,last_day_topic_5,last_day_topic_6,last_day_topic_7,last_day_topic_8,last_day_topic_9,last_halfday_tweets
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,...,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,RT @gimmocrypto: This gif of @mutantcats purch...
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,...,0.00456,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,Do you know why square cats are such good pets...
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,...,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,"#24px - anon dev. no roadmap. just pixels, cat..."
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,...,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,...,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,RT @BullieverIsland: Celebrating Citizens of B...
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,...,0.010895,0.010895,0.010895,0.010895,0.010895,0.010895,0.010895,0.010896,0.010895,RT @DamianSpriggs: I have always wanted to be ...
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,...,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,...,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,...,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,Omg looooool 2.2M followers 50E saleI can't g...
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,...,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,RT @SpaceLabCrypto: If you are worried that yo...


In [34]:
# Compute the number of tweets during the last hour for each sale event.
numLastHourTweets = []

for saleEvent in saleEventPairs:
    endDate = saleEvent[3]
    
    parsedEndDate = parser.parse(endDate)
    
    oneHourAgo = str(parsedEndDate - timedelta(hours=0, minutes=60))
    
    tweetsSinceLastHour = getTweetsInTimeInterval(cool_cats_tweets_df, oneHourAgo, endDate)['cleaned_text']
        
    numLastHourTweets.append(len(tweetsSinceLastHour))

In [645]:
# Compute the number of tweets during the last halfday for each sale event.
numLastHalfDayTweets = []

for saleEvent in saleEventPairs:
    endDate = saleEvent[3]
    
    parsedEndDate = parser.parse(endDate)
    
    halfDay = str(parsedEndDate - timedelta(hours=12, minutes=0))
    
    tweetsSinceLastHalfDay = getTweetsInTimeInterval(cool_cats_tweets_df, halfDay, endDate)['cleaned_text']
        
    numLastHalfDayTweets.append(len(tweetsSinceLastHalfDay))

In [507]:
# Compute the number of tweets during the last day for each sale event.
numLastDayTweets = []

for saleEvent in saleEventPairs:
    endDate = saleEvent[3]
    
    parsedEndDate = parser.parse(endDate)
    
    yesterday = str(parsedEndDate - timedelta(hours=24, minutes=0))
    
    tweetsSinceLastDay = getTweetsInTimeInterval(cool_cats_tweets_df, yesterday, endDate)['cleaned_text']
        
    numLastDayTweets.append(len(tweetsSinceLastDay))

In [509]:
# Add num_last_hour_tweets to DF.
sale_pairs_df['num_last_hour_tweets'] = numLastHourTweets

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,...,last_hour_topic_4,last_hour_topic_5,last_hour_topic_6,last_hour_topic_7,last_hour_topic_8,last_hour_topic_9,last_two_hour_tweets,num_last_two_hour_tweets,last_day_tweets,num_last_day_tweets
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,...,0.00747,0.007462,0.007467,0.007462,0.403463,0.007462,RT @gimmocrypto: This gif of @mutantcats purch...,243,RT @gimmocrypto: This gif of @mutantcats purch...,408
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,...,0.005589,0.005589,0.00559,0.005589,0.005589,0.283805,Do you know why square cats are such good pets...,137,Do you know why square cats are such good pets...,354
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,...,0.006734,0.00674,0.006735,0.006734,0.006734,0.483122,"#24px - anon dev. no roadmap. just pixels, cat...",71,"#24px - anon dev. no roadmap. just pixels, cat...",228
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,...,0.486775,0.009024,0.009024,0.009024,0.009024,0.009024,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,134,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,134
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,...,0.008887,0.008887,0.008887,0.008887,0.008887,0.008887,RT @BullieverIsland: Celebrating Citizens of B...,266,RT @BullieverIsland: Celebrating Citizens of B...,455
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,...,0.007751,0.007751,0.007753,0.007751,0.007751,0.007751,RT @DamianSpriggs: I have always wanted to be ...,172,RT @DamianSpriggs: I have always wanted to be ...,566
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,...,0.008117,0.008117,0.008119,0.008117,0.008117,0.008117,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,528
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,...,0.454525,0.008515,0.008513,0.008513,0.008513,0.008513,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,138,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,138
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,...,0.007746,0.007746,0.007747,0.007746,0.007746,0.007746,Omg looooool 2.2M followers 50E saleI can't g...,586,Omg looooool 2.2M followers 50E saleI can't g...,925
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,...,0.010773,0.010773,0.010773,0.010773,0.010773,0.010773,RT @SpaceLabCrypto: If you are worried that yo...,450,RT @SpaceLabCrypto: If you are worried that yo...,450


In [646]:
# Add num_last_halfday_tweets to DF.
sale_pairs_df['num_last_halfday_tweets'] = numLastHalfDayTweets

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,...,last_day_topic_2,last_day_topic_3,last_day_topic_4,last_day_topic_5,last_day_topic_6,last_day_topic_7,last_day_topic_8,last_day_topic_9,last_halfday_tweets,num_last_halfday_tweets
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,...,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,RT @gimmocrypto: This gif of @mutantcats purch...,243
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,...,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,Do you know why square cats are such good pets...,137
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,...,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,"#24px - anon dev. no roadmap. just pixels, cat...",228
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,...,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,134
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,...,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,RT @BullieverIsland: Celebrating Citizens of B...,266
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,...,0.010895,0.010895,0.010895,0.010895,0.010895,0.010895,0.010896,0.010895,RT @DamianSpriggs: I have always wanted to be ...,172
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,...,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,528
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,...,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,138
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,...,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,Omg looooool 2.2M followers 50E saleI can't g...,586
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,...,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,RT @SpaceLabCrypto: If you are worried that yo...,450


In [510]:
# Add num_last_day_tweets to DF.
sale_pairs_df['num_last_day_tweets'] = numLastDayTweets

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,...,last_hour_topic_4,last_hour_topic_5,last_hour_topic_6,last_hour_topic_7,last_hour_topic_8,last_hour_topic_9,last_two_hour_tweets,num_last_two_hour_tweets,last_day_tweets,num_last_day_tweets
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,...,0.00747,0.007462,0.007467,0.007462,0.403463,0.007462,RT @gimmocrypto: This gif of @mutantcats purch...,243,RT @gimmocrypto: This gif of @mutantcats purch...,408
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,...,0.005589,0.005589,0.00559,0.005589,0.005589,0.283805,Do you know why square cats are such good pets...,137,Do you know why square cats are such good pets...,354
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,...,0.006734,0.00674,0.006735,0.006734,0.006734,0.483122,"#24px - anon dev. no roadmap. just pixels, cat...",71,"#24px - anon dev. no roadmap. just pixels, cat...",228
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,...,0.486775,0.009024,0.009024,0.009024,0.009024,0.009024,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,134,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,134
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,...,0.008887,0.008887,0.008887,0.008887,0.008887,0.008887,RT @BullieverIsland: Celebrating Citizens of B...,266,RT @BullieverIsland: Celebrating Citizens of B...,455
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,...,0.007751,0.007751,0.007753,0.007751,0.007751,0.007751,RT @DamianSpriggs: I have always wanted to be ...,172,RT @DamianSpriggs: I have always wanted to be ...,566
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,...,0.008117,0.008117,0.008119,0.008117,0.008117,0.008117,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,528
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,...,0.454525,0.008515,0.008513,0.008513,0.008513,0.008513,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,138,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,138
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,...,0.007746,0.007746,0.007747,0.007746,0.007746,0.007746,Omg looooool 2.2M followers 50E saleI can't g...,586,Omg looooool 2.2M followers 50E saleI can't g...,925
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,...,0.010773,0.010773,0.010773,0.010773,0.010773,0.010773,RT @SpaceLabCrypto: If you are worried that yo...,450,RT @SpaceLabCrypto: If you are worried that yo...,450


In [39]:
# Computing average sentiment of tweets during the last hour for each sale event,
# and average number of positive tweets.

# Initially tried to use TextBlob, but was too slow.
# Instead using Spacy.

if computeLastHourSentiments:

#from textblob import TextBlob
#from textblob.sentiments import NaiveBayesAnalyzer
#import nltk
#nltk.download('movie_reviews')

    lastHourAvgTweetSentiment = []

    tweetToSentimentAnalysis = dict()

    saleEventCounter = 0

    import eng_spacysentiment
    spacySentimentAnalysisModel = eng_spacysentiment.load()

    for saleEvent in saleEventPairs:
        print("Processing " + str(saleEventCounter) + " out of " + str(len(saleEventPairs)))
        saleEventCounter += 1
    
        endDate = saleEvent[3]
    
        parsedEndDate = parser.parse(endDate)
    
        oneHourAgo = str(parsedEndDate - timedelta(hours=0, minutes=60))
    
        tweetsSinceLastHour = getTweetsInTimeInterval(cool_cats_tweets_df, oneHourAgo, endDate)['cleaned_text']
    
        sentimentProbs = []
    
        numPositiveTweets = 0
    
        tweetCounter = 0

        for tweet in tweetsSinceLastHour:
            #print("Processing tweet " + str(tweetCounter) + " out of " + str(len(tweetsSinceLastHour)))
            tweetCounter += 1
        
            if tweet not in tweetToSentimentAnalysis:
                sentimentAnalysis = spacySentimentAnalysisModel(tweet)
                tweetToSentimentAnalysis[tweet] = sentimentAnalysis
            else:
                sentimentAnalysis = tweetToSentimentAnalysis[tweet]
        
            sentimentProbs.append(sentimentAnalysis.cats['positive'])
        
            if sentimentAnalysis.cats['positive'] > sentimentAnalysis.cats['negative']:
                numPositiveTweets += 1
        
        #if tweet not in tweetToSentimentAnalysis:
            #textBlobResult = TextBlob(tweet, analyzer=NaiveBayesAnalyzer())
            #sentimentAnalysis = textBlobResult.sentiment
            
            #tweetToSentimentAnalysis[tweet] = textBlobResult.sentiment
        #else:
            #sentimentAnalysis = tweetToSentimentAnalysis[tweet]
    
        #sentimentProbs.append(sentimentAnalysis.p_pos)
    
        lastHourAvgTweetSentiment.append([np.mean(sentimentProbs), numPositiveTweets / len(sentimentProbs)])
    
    lastHourAvgTweetSentiment    

Processing 0 out of 1571
Processing 1 out of 1571
Processing 2 out of 1571
Processing 3 out of 1571
Processing 4 out of 1571
Processing 5 out of 1571
Processing 6 out of 1571
Processing 7 out of 1571
Processing 8 out of 1571
Processing 9 out of 1571
Processing 10 out of 1571
Processing 11 out of 1571
Processing 12 out of 1571
Processing 13 out of 1571
Processing 14 out of 1571
Processing 15 out of 1571
Processing 16 out of 1571
Processing 17 out of 1571
Processing 18 out of 1571
Processing 19 out of 1571
Processing 20 out of 1571
Processing 21 out of 1571
Processing 22 out of 1571
Processing 23 out of 1571
Processing 24 out of 1571
Processing 25 out of 1571
Processing 26 out of 1571
Processing 27 out of 1571
Processing 28 out of 1571
Processing 29 out of 1571
Processing 30 out of 1571
Processing 31 out of 1571
Processing 32 out of 1571
Processing 33 out of 1571
Processing 34 out of 1571
Processing 35 out of 1571
Processing 36 out of 1571
Processing 37 out of 1571
Processing 38 out of 1

Processing 309 out of 1571
Processing 310 out of 1571
Processing 311 out of 1571
Processing 312 out of 1571
Processing 313 out of 1571
Processing 314 out of 1571
Processing 315 out of 1571
Processing 316 out of 1571
Processing 317 out of 1571
Processing 318 out of 1571
Processing 319 out of 1571
Processing 320 out of 1571
Processing 321 out of 1571
Processing 322 out of 1571
Processing 323 out of 1571
Processing 324 out of 1571
Processing 325 out of 1571
Processing 326 out of 1571
Processing 327 out of 1571
Processing 328 out of 1571
Processing 329 out of 1571
Processing 330 out of 1571
Processing 331 out of 1571
Processing 332 out of 1571
Processing 333 out of 1571
Processing 334 out of 1571
Processing 335 out of 1571
Processing 336 out of 1571
Processing 337 out of 1571
Processing 338 out of 1571
Processing 339 out of 1571
Processing 340 out of 1571
Processing 341 out of 1571
Processing 342 out of 1571
Processing 343 out of 1571
Processing 344 out of 1571
Processing 345 out of 1571
P

Processing 619 out of 1571
Processing 620 out of 1571
Processing 621 out of 1571
Processing 622 out of 1571
Processing 623 out of 1571
Processing 624 out of 1571
Processing 625 out of 1571
Processing 626 out of 1571
Processing 627 out of 1571
Processing 628 out of 1571
Processing 629 out of 1571
Processing 630 out of 1571
Processing 631 out of 1571
Processing 632 out of 1571
Processing 633 out of 1571
Processing 634 out of 1571
Processing 635 out of 1571
Processing 636 out of 1571
Processing 637 out of 1571
Processing 638 out of 1571
Processing 639 out of 1571
Processing 640 out of 1571
Processing 641 out of 1571
Processing 642 out of 1571
Processing 643 out of 1571
Processing 644 out of 1571
Processing 645 out of 1571
Processing 646 out of 1571
Processing 647 out of 1571
Processing 648 out of 1571
Processing 649 out of 1571
Processing 650 out of 1571
Processing 651 out of 1571
Processing 652 out of 1571
Processing 653 out of 1571
Processing 654 out of 1571
Processing 655 out of 1571
P

Processing 948 out of 1571
Processing 949 out of 1571
Processing 950 out of 1571
Processing 951 out of 1571
Processing 952 out of 1571
Processing 953 out of 1571
Processing 954 out of 1571
Processing 955 out of 1571
Processing 956 out of 1571
Processing 957 out of 1571
Processing 958 out of 1571
Processing 959 out of 1571
Processing 960 out of 1571
Processing 961 out of 1571
Processing 962 out of 1571
Processing 963 out of 1571
Processing 964 out of 1571
Processing 965 out of 1571
Processing 966 out of 1571
Processing 967 out of 1571
Processing 968 out of 1571
Processing 969 out of 1571
Processing 970 out of 1571
Processing 971 out of 1571
Processing 972 out of 1571
Processing 973 out of 1571
Processing 974 out of 1571
Processing 975 out of 1571
Processing 976 out of 1571
Processing 977 out of 1571
Processing 978 out of 1571
Processing 979 out of 1571
Processing 980 out of 1571
Processing 981 out of 1571
Processing 982 out of 1571
Processing 983 out of 1571
Processing 984 out of 1571
P

Processing 1267 out of 1571
Processing 1268 out of 1571
Processing 1269 out of 1571
Processing 1270 out of 1571
Processing 1271 out of 1571
Processing 1272 out of 1571
Processing 1273 out of 1571
Processing 1274 out of 1571
Processing 1275 out of 1571
Processing 1276 out of 1571
Processing 1277 out of 1571
Processing 1278 out of 1571
Processing 1279 out of 1571
Processing 1280 out of 1571
Processing 1281 out of 1571
Processing 1282 out of 1571
Processing 1283 out of 1571
Processing 1284 out of 1571
Processing 1285 out of 1571
Processing 1286 out of 1571
Processing 1287 out of 1571
Processing 1288 out of 1571
Processing 1289 out of 1571
Processing 1290 out of 1571
Processing 1291 out of 1571
Processing 1292 out of 1571
Processing 1293 out of 1571
Processing 1294 out of 1571
Processing 1295 out of 1571
Processing 1296 out of 1571
Processing 1297 out of 1571
Processing 1298 out of 1571
Processing 1299 out of 1571
Processing 1300 out of 1571
Processing 1301 out of 1571
Processing 1302 out 

[[0.4616507705504441, 0.4609053497942387],
 [0.5708190511152341, 0.6131386861313869],
 [0.3978795320385386, 0.38028169014084506],
 [0.6948880234928765, 0.75],
 [0.5784747496822232, 0.5864661654135338],
 [0.6347758366530108, 0.6046511627906976],
 [0.579229147666601, 0.5223880597014925],
 [0.6801675673701387, 0.7083333333333334],
 [0.7393301224156048, 0.7474402730375427],
 [0.734631182076156, 0.7387387387387387],
 [0.6887162667225746, 0.6885245901639344],
 [0.5893895128093519, 0.5746268656716418],
 [0.7601444336400832, 0.7562189054726368],
 [0.5925746345207832, 0.6037735849056604],
 [0.6009111392788279, 0.6061320754716981],
 [0.6086213509004279, 0.6060606060606061],
 [0.5113031991294462, 0.5180722891566265],
 [0.5557840160923523, 0.5666666666666667],
 [0.8032909981924342, 0.8120104438642297],
 [0.8097240591665994, 0.8173913043478261],
 [0.6947280573065171, 0.6908517350157729],
 [0.4499796388500031, 0.460093896713615],
 [0.6760780028432883, 0.6731182795698925],
 [0.6639951044777054, 0.682

In [648]:
# Computing average sentiment of tweets during the last halfday for each sale event,
# and average number of positive tweets.

if computeLastHalfDaySentiments:

    lastHalfDayAvgTweetSentiment = []

    tweetToSentimentAnalysis = dict()

    saleEventCounter = 0

    for saleEvent in saleEventPairs:
        print("Processing " + str(saleEventCounter) + " out of " + str(len(saleEventPairs)))
        saleEventCounter += 1
    
        endDate = saleEvent[3]
    
        parsedEndDate = parser.parse(endDate)
    
        halfDay = str(parsedEndDate - timedelta(hours=12, minutes=0))
    
        tweetsSinceLastHalfDay = getTweetsInTimeInterval(cool_cats_tweets_df, halfDay, endDate)['cleaned_text']
    
        sentimentProbs = []
    
        numPositiveTweets = 0
    
        tweetCounter = 0

        for tweet in tweetsSinceLastHalfDay:
            tweetCounter += 1
        
            if tweet not in tweetToSentimentAnalysis:
                sentimentAnalysis = spacySentimentAnalysisModel(tweet)
                tweetToSentimentAnalysis[tweet] = sentimentAnalysis
            else:
                sentimentAnalysis = tweetToSentimentAnalysis[tweet]
        
            sentimentProbs.append(sentimentAnalysis.cats['positive'])
        
            if sentimentAnalysis.cats['positive'] > sentimentAnalysis.cats['negative']:
                numPositiveTweets += 1
    
        lastHalfDayAvgTweetSentiment.append([np.mean(sentimentProbs), numPositiveTweets / len(sentimentProbs)])
    
    lastHalfDayAvgTweetSentiment  

Processing 0 out of 1571
Processing 1 out of 1571
Processing 2 out of 1571
Processing 3 out of 1571
Processing 4 out of 1571
Processing 5 out of 1571
Processing 6 out of 1571
Processing 7 out of 1571
Processing 8 out of 1571
Processing 9 out of 1571
Processing 10 out of 1571
Processing 11 out of 1571
Processing 12 out of 1571
Processing 13 out of 1571
Processing 14 out of 1571
Processing 15 out of 1571
Processing 16 out of 1571
Processing 17 out of 1571
Processing 18 out of 1571
Processing 19 out of 1571
Processing 20 out of 1571
Processing 21 out of 1571
Processing 22 out of 1571
Processing 23 out of 1571
Processing 24 out of 1571
Processing 25 out of 1571
Processing 26 out of 1571
Processing 27 out of 1571
Processing 28 out of 1571
Processing 29 out of 1571
Processing 30 out of 1571
Processing 31 out of 1571
Processing 32 out of 1571
Processing 33 out of 1571
Processing 34 out of 1571
Processing 35 out of 1571
Processing 36 out of 1571
Processing 37 out of 1571
Processing 38 out of 1

Processing 309 out of 1571
Processing 310 out of 1571
Processing 311 out of 1571
Processing 312 out of 1571
Processing 313 out of 1571
Processing 314 out of 1571
Processing 315 out of 1571
Processing 316 out of 1571
Processing 317 out of 1571
Processing 318 out of 1571
Processing 319 out of 1571
Processing 320 out of 1571
Processing 321 out of 1571
Processing 322 out of 1571
Processing 323 out of 1571
Processing 324 out of 1571
Processing 325 out of 1571
Processing 326 out of 1571
Processing 327 out of 1571
Processing 328 out of 1571
Processing 329 out of 1571
Processing 330 out of 1571
Processing 331 out of 1571
Processing 332 out of 1571
Processing 333 out of 1571
Processing 334 out of 1571
Processing 335 out of 1571
Processing 336 out of 1571
Processing 337 out of 1571
Processing 338 out of 1571
Processing 339 out of 1571
Processing 340 out of 1571
Processing 341 out of 1571
Processing 342 out of 1571
Processing 343 out of 1571
Processing 344 out of 1571
Processing 345 out of 1571
P

Processing 624 out of 1571
Processing 625 out of 1571
Processing 626 out of 1571
Processing 627 out of 1571
Processing 628 out of 1571
Processing 629 out of 1571
Processing 630 out of 1571
Processing 631 out of 1571
Processing 632 out of 1571
Processing 633 out of 1571
Processing 634 out of 1571
Processing 635 out of 1571
Processing 636 out of 1571
Processing 637 out of 1571
Processing 638 out of 1571
Processing 639 out of 1571
Processing 640 out of 1571
Processing 641 out of 1571
Processing 642 out of 1571
Processing 643 out of 1571
Processing 644 out of 1571
Processing 645 out of 1571
Processing 646 out of 1571
Processing 647 out of 1571
Processing 648 out of 1571
Processing 649 out of 1571
Processing 650 out of 1571
Processing 651 out of 1571
Processing 652 out of 1571
Processing 653 out of 1571
Processing 654 out of 1571
Processing 655 out of 1571
Processing 656 out of 1571
Processing 657 out of 1571
Processing 658 out of 1571
Processing 659 out of 1571
Processing 660 out of 1571
P

Processing 934 out of 1571
Processing 935 out of 1571
Processing 936 out of 1571
Processing 937 out of 1571
Processing 938 out of 1571
Processing 939 out of 1571
Processing 940 out of 1571
Processing 941 out of 1571
Processing 942 out of 1571
Processing 943 out of 1571
Processing 944 out of 1571
Processing 945 out of 1571
Processing 946 out of 1571
Processing 947 out of 1571
Processing 948 out of 1571
Processing 949 out of 1571
Processing 950 out of 1571
Processing 951 out of 1571
Processing 952 out of 1571
Processing 953 out of 1571
Processing 954 out of 1571
Processing 955 out of 1571
Processing 956 out of 1571
Processing 957 out of 1571
Processing 958 out of 1571
Processing 959 out of 1571
Processing 960 out of 1571
Processing 961 out of 1571
Processing 962 out of 1571
Processing 963 out of 1571
Processing 964 out of 1571
Processing 965 out of 1571
Processing 966 out of 1571
Processing 967 out of 1571
Processing 968 out of 1571
Processing 969 out of 1571
Processing 970 out of 1571
P

Processing 1237 out of 1571
Processing 1238 out of 1571
Processing 1239 out of 1571
Processing 1240 out of 1571
Processing 1241 out of 1571
Processing 1242 out of 1571
Processing 1243 out of 1571
Processing 1244 out of 1571
Processing 1245 out of 1571
Processing 1246 out of 1571
Processing 1247 out of 1571
Processing 1248 out of 1571
Processing 1249 out of 1571
Processing 1250 out of 1571
Processing 1251 out of 1571
Processing 1252 out of 1571
Processing 1253 out of 1571
Processing 1254 out of 1571
Processing 1255 out of 1571
Processing 1256 out of 1571
Processing 1257 out of 1571
Processing 1258 out of 1571
Processing 1259 out of 1571
Processing 1260 out of 1571
Processing 1261 out of 1571
Processing 1262 out of 1571
Processing 1263 out of 1571
Processing 1264 out of 1571
Processing 1265 out of 1571
Processing 1266 out of 1571
Processing 1267 out of 1571
Processing 1268 out of 1571
Processing 1269 out of 1571
Processing 1270 out of 1571
Processing 1271 out of 1571
Processing 1272 out 

Processing 1538 out of 1571
Processing 1539 out of 1571
Processing 1540 out of 1571
Processing 1541 out of 1571
Processing 1542 out of 1571
Processing 1543 out of 1571
Processing 1544 out of 1571
Processing 1545 out of 1571
Processing 1546 out of 1571
Processing 1547 out of 1571
Processing 1548 out of 1571
Processing 1549 out of 1571
Processing 1550 out of 1571
Processing 1551 out of 1571
Processing 1552 out of 1571
Processing 1553 out of 1571
Processing 1554 out of 1571
Processing 1555 out of 1571
Processing 1556 out of 1571
Processing 1557 out of 1571
Processing 1558 out of 1571
Processing 1559 out of 1571
Processing 1560 out of 1571
Processing 1561 out of 1571
Processing 1562 out of 1571
Processing 1563 out of 1571
Processing 1564 out of 1571
Processing 1565 out of 1571
Processing 1566 out of 1571
Processing 1567 out of 1571
Processing 1568 out of 1571
Processing 1569 out of 1571
Processing 1570 out of 1571


In [513]:
# Computing average sentiment of tweets during the last day for each sale event,
# and average number of positive tweets.

if computeLastDaySentiments:

    lastDayAvgTweetSentiment = []

    tweetToSentimentAnalysis = dict()

    saleEventCounter = 0

    for saleEvent in saleEventPairs:
        print("Processing " + str(saleEventCounter) + " out of " + str(len(saleEventPairs)))
        saleEventCounter += 1
    
        endDate = saleEvent[3]
    
        parsedEndDate = parser.parse(endDate)
    
        yesterday = str(parsedEndDate - timedelta(hours=24, minutes=0))
    
        tweetsSinceLastDay = getTweetsInTimeInterval(cool_cats_tweets_df, yesterday, endDate)['cleaned_text']
    
        sentimentProbs = []
    
        numPositiveTweets = 0
    
        tweetCounter = 0

        for tweet in tweetsSinceLastDay:
            tweetCounter += 1
        
            if tweet not in tweetToSentimentAnalysis:
                sentimentAnalysis = spacySentimentAnalysisModel(tweet)
                tweetToSentimentAnalysis[tweet] = sentimentAnalysis
            else:
                sentimentAnalysis = tweetToSentimentAnalysis[tweet]
        
            sentimentProbs.append(sentimentAnalysis.cats['positive'])
        
            if sentimentAnalysis.cats['positive'] > sentimentAnalysis.cats['negative']:
                numPositiveTweets += 1
    
        lastDayAvgTweetSentiment.append([np.mean(sentimentProbs), numPositiveTweets / len(sentimentProbs)])
    
    lastDayAvgTweetSentiment  

Processing 0 out of 1571
Processing 1 out of 1571
Processing 2 out of 1571
Processing 3 out of 1571
Processing 4 out of 1571
Processing 5 out of 1571
Processing 6 out of 1571
Processing 7 out of 1571
Processing 8 out of 1571
Processing 9 out of 1571
Processing 10 out of 1571
Processing 11 out of 1571
Processing 12 out of 1571
Processing 13 out of 1571
Processing 14 out of 1571
Processing 15 out of 1571
Processing 16 out of 1571
Processing 17 out of 1571
Processing 18 out of 1571
Processing 19 out of 1571
Processing 20 out of 1571
Processing 21 out of 1571
Processing 22 out of 1571
Processing 23 out of 1571
Processing 24 out of 1571
Processing 25 out of 1571
Processing 26 out of 1571
Processing 27 out of 1571
Processing 28 out of 1571
Processing 29 out of 1571
Processing 30 out of 1571
Processing 31 out of 1571
Processing 32 out of 1571
Processing 33 out of 1571
Processing 34 out of 1571
Processing 35 out of 1571
Processing 36 out of 1571
Processing 37 out of 1571
Processing 38 out of 1

Processing 309 out of 1571
Processing 310 out of 1571
Processing 311 out of 1571
Processing 312 out of 1571
Processing 313 out of 1571
Processing 314 out of 1571
Processing 315 out of 1571
Processing 316 out of 1571
Processing 317 out of 1571
Processing 318 out of 1571
Processing 319 out of 1571
Processing 320 out of 1571
Processing 321 out of 1571
Processing 322 out of 1571
Processing 323 out of 1571
Processing 324 out of 1571
Processing 325 out of 1571
Processing 326 out of 1571
Processing 327 out of 1571
Processing 328 out of 1571
Processing 329 out of 1571
Processing 330 out of 1571
Processing 331 out of 1571
Processing 332 out of 1571
Processing 333 out of 1571
Processing 334 out of 1571
Processing 335 out of 1571
Processing 336 out of 1571
Processing 337 out of 1571
Processing 338 out of 1571
Processing 339 out of 1571
Processing 340 out of 1571
Processing 341 out of 1571
Processing 342 out of 1571
Processing 343 out of 1571
Processing 344 out of 1571
Processing 345 out of 1571
P

Processing 619 out of 1571
Processing 620 out of 1571
Processing 621 out of 1571
Processing 622 out of 1571
Processing 623 out of 1571
Processing 624 out of 1571
Processing 625 out of 1571
Processing 626 out of 1571
Processing 627 out of 1571
Processing 628 out of 1571
Processing 629 out of 1571
Processing 630 out of 1571
Processing 631 out of 1571
Processing 632 out of 1571
Processing 633 out of 1571
Processing 634 out of 1571
Processing 635 out of 1571
Processing 636 out of 1571
Processing 637 out of 1571
Processing 638 out of 1571
Processing 639 out of 1571
Processing 640 out of 1571
Processing 641 out of 1571
Processing 642 out of 1571
Processing 643 out of 1571
Processing 644 out of 1571
Processing 645 out of 1571
Processing 646 out of 1571
Processing 647 out of 1571
Processing 648 out of 1571
Processing 649 out of 1571
Processing 650 out of 1571
Processing 651 out of 1571
Processing 652 out of 1571
Processing 653 out of 1571
Processing 654 out of 1571
Processing 655 out of 1571
P

Processing 937 out of 1571
Processing 938 out of 1571
Processing 939 out of 1571
Processing 940 out of 1571
Processing 941 out of 1571
Processing 942 out of 1571
Processing 943 out of 1571
Processing 944 out of 1571
Processing 945 out of 1571
Processing 946 out of 1571
Processing 947 out of 1571
Processing 948 out of 1571
Processing 949 out of 1571
Processing 950 out of 1571
Processing 951 out of 1571
Processing 952 out of 1571
Processing 953 out of 1571
Processing 954 out of 1571
Processing 955 out of 1571
Processing 956 out of 1571
Processing 957 out of 1571
Processing 958 out of 1571
Processing 959 out of 1571
Processing 960 out of 1571
Processing 961 out of 1571
Processing 962 out of 1571
Processing 963 out of 1571
Processing 964 out of 1571
Processing 965 out of 1571
Processing 966 out of 1571
Processing 967 out of 1571
Processing 968 out of 1571
Processing 969 out of 1571
Processing 970 out of 1571
Processing 971 out of 1571
Processing 972 out of 1571
Processing 973 out of 1571
P

Processing 1251 out of 1571
Processing 1252 out of 1571
Processing 1253 out of 1571
Processing 1254 out of 1571
Processing 1255 out of 1571
Processing 1256 out of 1571
Processing 1257 out of 1571
Processing 1258 out of 1571
Processing 1259 out of 1571
Processing 1260 out of 1571
Processing 1261 out of 1571
Processing 1262 out of 1571
Processing 1263 out of 1571
Processing 1264 out of 1571
Processing 1265 out of 1571
Processing 1266 out of 1571
Processing 1267 out of 1571
Processing 1268 out of 1571
Processing 1269 out of 1571
Processing 1270 out of 1571
Processing 1271 out of 1571
Processing 1272 out of 1571
Processing 1273 out of 1571
Processing 1274 out of 1571
Processing 1275 out of 1571
Processing 1276 out of 1571
Processing 1277 out of 1571
Processing 1278 out of 1571
Processing 1279 out of 1571
Processing 1280 out of 1571
Processing 1281 out of 1571
Processing 1282 out of 1571
Processing 1283 out of 1571
Processing 1284 out of 1571
Processing 1285 out of 1571
Processing 1286 out 

Processing 1553 out of 1571
Processing 1554 out of 1571
Processing 1555 out of 1571
Processing 1556 out of 1571
Processing 1557 out of 1571
Processing 1558 out of 1571
Processing 1559 out of 1571
Processing 1560 out of 1571
Processing 1561 out of 1571
Processing 1562 out of 1571
Processing 1563 out of 1571
Processing 1564 out of 1571
Processing 1565 out of 1571
Processing 1566 out of 1571
Processing 1567 out of 1571
Processing 1568 out of 1571
Processing 1569 out of 1571
Processing 1570 out of 1571


In [40]:
# Create Sentiment DF for the last hour.
last_hour_sentiment_df = pd.DataFrame(data = lastHourAvgTweetSentiment, columns = ["last_hour_avg_sentiment", "last_hour_pos_tweets"])

last_hour_sentiment_df.head(10)

Unnamed: 0,last_hour_avg_sentiment,last_hour_pos_tweets
0,0.461651,0.460905
1,0.570819,0.613139
2,0.39788,0.380282
3,0.694888,0.75
4,0.578475,0.586466
5,0.634776,0.604651
6,0.579229,0.522388
7,0.680168,0.708333
8,0.73933,0.74744
9,0.734631,0.738739


In [649]:
# Create Sentiment DF for the last halfday.
last_halfday_sentiment_df = pd.DataFrame(data = lastHalfDayAvgTweetSentiment, columns = ["last_halfday_avg_sentiment", "last_halfday_pos_tweets"])

last_halfday_sentiment_df.head(10)

Unnamed: 0,last_halfday_avg_sentiment,last_halfday_pos_tweets
0,0.461651,0.460905
1,0.570819,0.613139
2,0.533727,0.539474
3,0.634798,0.641791
4,0.578475,0.586466
5,0.634776,0.604651
6,0.646059,0.660985
7,0.63398,0.637681
8,0.73933,0.74744
9,0.67812,0.682222


In [514]:
# Create Sentiment DF for the last day.
last_day_sentiment_df = pd.DataFrame(data = lastDayAvgTweetSentiment, columns = ["last_day_avg_sentiment", "last_day_pos_tweets"])

last_day_sentiment_df.head(10)

Unnamed: 0,last_day_avg_sentiment,last_day_pos_tweets
0,0.530899,0.529412
1,0.525302,0.548023
2,0.533727,0.539474
3,0.634798,0.641791
4,0.530032,0.540659
5,0.658452,0.676678
6,0.646059,0.660985
7,0.63398,0.637681
8,0.710116,0.716757
9,0.67812,0.682222


In [41]:
if computeLastHourSentiments:
    last_hour_sentiment_df.to_csv('../twitter/data/postprocessed/last_hour_sentiments_coolcats_07-10_2021.csv', index = True)

In [650]:
if computeLastHalfDaySentiments:
    last_halfday_sentiment_df.to_csv('../twitter/data/postprocessed/last_halfday_sentiments_coolcats_07-10_2021.csv', index = True)

In [515]:
if computeLastDaySentiments:
    last_day_sentiment_df.to_csv('../twitter/data/postprocessed/last_day_sentiments_coolcats_07-10_2021.csv', index = True)

In [42]:
last_hour_sentiment_df = pd.read_csv('../twitter/data/postprocessed/last_hour_sentiments_coolcats_07-10_2021.csv')

In [651]:
last_halfday_sentiment_df = pd.read_csv('../twitter/data/postprocessed/last_halfday_sentiments_coolcats_07-10_2021.csv')

In [516]:
last_day_sentiment_df = pd.read_csv('../twitter/data/postprocessed/last_day_sentiments_coolcats_07-10_2021.csv')

In [53]:
# Add last hour sentiment columns to sale pairs df.
sale_pairs_df["last_hour_avg_sentiment"] = last_hour_sentiment_df["last_hour_avg_sentiment"]
sale_pairs_df["last_hour_pos_tweets"] = last_hour_sentiment_df["last_hour_pos_tweets"]

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,last_hour_avg_sentiment,last_hour_pos_tweets
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,0.461651,0.460905
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,0.570819,0.613139
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,0.39788,0.380282
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,0.694888,0.75
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,0.578475,0.586466
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,0.634776,0.604651
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,0.579229,0.522388
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,0.680168,0.708333
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,0.73933,0.74744
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,0.734631,0.738739


In [652]:
# Add last halfday sentiment columns to sale pairs df.
sale_pairs_df["last_halfday_avg_sentiment"] = last_halfday_sentiment_df["last_halfday_avg_sentiment"]
sale_pairs_df["last_halfday_pos_tweets"] = last_halfday_sentiment_df["last_halfday_pos_tweets"]

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,...,last_day_topic_4,last_day_topic_5,last_day_topic_6,last_day_topic_7,last_day_topic_8,last_day_topic_9,last_halfday_tweets,num_last_halfday_tweets,last_halfday_avg_sentiment,last_halfday_pos_tweets
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,...,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,RT @gimmocrypto: This gif of @mutantcats purch...,243,0.461651,0.460905
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,...,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,Do you know why square cats are such good pets...,137,0.570819,0.613139
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,...,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,"#24px - anon dev. no roadmap. just pixels, cat...",228,0.533727,0.539474
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,...,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,134,0.634798,0.641791
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,...,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,RT @BullieverIsland: Celebrating Citizens of B...,266,0.578475,0.586466
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,...,0.010895,0.010895,0.010895,0.010895,0.010896,0.010895,RT @DamianSpriggs: I have always wanted to be ...,172,0.634776,0.604651
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,...,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,528,0.646059,0.660985
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,...,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,138,0.63398,0.637681
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,...,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,Omg looooool 2.2M followers 50E saleI can't g...,586,0.73933,0.74744
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,...,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,RT @SpaceLabCrypto: If you are worried that yo...,450,0.67812,0.682222


In [517]:
# Add last day sentiment columns to sale pairs df.
sale_pairs_df["last_day_avg_sentiment"] = last_day_sentiment_df["last_day_avg_sentiment"]
sale_pairs_df["last_day_pos_tweets"] = last_day_sentiment_df["last_day_pos_tweets"]

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,...,last_hour_topic_6,last_hour_topic_7,last_hour_topic_8,last_hour_topic_9,last_two_hour_tweets,num_last_two_hour_tweets,last_day_tweets,num_last_day_tweets,last_day_avg_sentiment,last_day_pos_tweets
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,...,0.007467,0.007462,0.403463,0.007462,RT @gimmocrypto: This gif of @mutantcats purch...,243,RT @gimmocrypto: This gif of @mutantcats purch...,408,0.530899,0.529412
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,...,0.00559,0.005589,0.005589,0.283805,Do you know why square cats are such good pets...,137,Do you know why square cats are such good pets...,354,0.525302,0.548023
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,...,0.006735,0.006734,0.006734,0.483122,"#24px - anon dev. no roadmap. just pixels, cat...",71,"#24px - anon dev. no roadmap. just pixels, cat...",228,0.533727,0.539474
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,...,0.009024,0.009024,0.009024,0.009024,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,134,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,134,0.634798,0.641791
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,...,0.008887,0.008887,0.008887,0.008887,RT @BullieverIsland: Celebrating Citizens of B...,266,RT @BullieverIsland: Celebrating Citizens of B...,455,0.530032,0.540659
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,...,0.007753,0.007751,0.007751,0.007751,RT @DamianSpriggs: I have always wanted to be ...,172,RT @DamianSpriggs: I have always wanted to be ...,566,0.658452,0.676678
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,...,0.008119,0.008117,0.008117,0.008117,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,528,0.646059,0.660985
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,...,0.008513,0.008513,0.008513,0.008513,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,138,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,138,0.63398,0.637681
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,...,0.007747,0.007746,0.007746,0.007746,Omg looooool 2.2M followers 50E saleI can't g...,586,Omg looooool 2.2M followers 50E saleI can't g...,925,0.710116,0.716757
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,...,0.010773,0.010773,0.010773,0.010773,RT @SpaceLabCrypto: If you are worried that yo...,450,RT @SpaceLabCrypto: If you are worried that yo...,450,0.67812,0.682222


In [44]:
# Get the embeddings for the different labels.

from numpy import dot
from numpy.linalg import norm

# Cosine similarity for comparing embeddings.
def cosineSimilarity(a, b):
    return np.dot(a, b)/(np.linalg.norm(a)*np.linalg.norm(b))

# Returns a Distilbert embedding for a given text as a vector.
def getDistilbertEmbeddingForText(text):
    distilbertInput=distilbertTokenizer.encode_plus(\
                                                    text,\
                                                    max_length = 512,\
                                                    add_special_tokens = True,\
                                                    pad_to_max_length = True,\
                                                    truncation=True)
    idInput=np.asarray(distilbertInput['input_ids'])
    maskInput=np.asarray(distilbertInput['attention_mask'])
    distilbertOutput=distilbertModel([idInput.reshape(1,-1),maskInput.reshape(1,-1)])
    
    #print(type(distilbertOutput),distilbertOutput)
    
    return np.array(distilbertOutput[0][0]).reshape(-1)

# Get the embedding of the concatenation of all tweets for a given Momentum label.
def getDistilbertEmbeddingsForMomentumLabel(df, targetLabel, tweetsColumn):
    tweetsByLabel = df.loc[df.five_percent_momentum == targetLabel, tweetsColumn]
    
    concatenatedTweets = " ".join(tweetsByLabel)
        
    return getDistilbertEmbeddingForText(concatenatedTweets)

# Get the embeddings for all labels.
def getDistilbertEmbeddingsForMomentumLabels(df, tweetsColumn):
    distilbertEmbeddingsByLabel = dict()
    labels = ["UP", "DOWN", "FLAT"]
    
    for label in labels:
        distilbertEmbeddingsByLabel[label] = getDistilbertEmbeddingsForMomentumLabel(df, label, tweetsColumn)
        
    return distilbertEmbeddingsByLabel

In [45]:
if computeEmbeddingsByLabel:
    
    startTime = time.time()

    # Getting memory error when concatenating all histories.
    #distilbertEmbeddingsByLabel = getDistilbertEmbeddingsForMomentumLabels(sale_pairs_df, 'all_tweets')

    # Get Distilbert embeddings for tweets during the last hour of a sale.
    lastHourDistilbertEmbeddingsByLabel = getDistilbertEmbeddingsForMomentumLabels(sale_pairs_df, 'last_hour_tweets')

    #distilbertOutputs = []

    #distilbertInput=distilbertTokenizer.encode_plus(concatenatedTweetsBetweenSaleEvents,add_special_tokens = True,pad_to_max_length = True,truncation=True)
    #idInput=np.asarray(distilbertInput['input_ids'])
    #maskInput=np.asarray(distilbertInput['attention_mask'])
    #distilbertOutput=distilbertModel([idInput.reshape(1,-1),maskInput.reshape(1,-1)])

    #print(type(distilbertOutput),distilbertOutput)

    #for i in range(len(concatenatedTweetsBetweenSaleEvents)):
    #    if i % 100 == 0:
    #        print("Computing Embedding for tweets " + str(i) + " out of " + str(len(concatenatedTweetsBetweenSaleEvents)))
    
    #    concatenatedTweets = concatenatedTweetsBetweenSaleEvents[i]
    #    distilbertInput=distilbertTokenizer.encode_plus(concatenatedTweets,add_special_tokens = True,pad_to_max_length = True,truncation=True)
    #    idInput=np.asarray(distilbertInput['input_ids'])
    #    maskInput=np.asarray(distilbertInput['attention_mask'])

    #    distilbertOutput=distilbertModel([idInput.reshape(1,-1),maskInput.reshape(1,-1)])
    
    #    distilbertOutputs.append(distilbertOutput)

    #type(distilbertOutput),distilbertOutput
    
    #print(getDistilbertEmbeddingForText(concatenatedTweetsBetweenSaleEvents[0]))
    
    endTime = time.time()
    print("Finished computed embeddings for labels in " + str(endTime - startTime) + " seconds.")

In [46]:
if computeEmbeddingsByLabel:
    last_hour_embeddings_per_label_df.to_csv('../twitter/data/postprocessed/last_hour_embeddings_per_label_coolcats_07-10_2021.csv', index = True)

In [47]:
last_hour_embeddings_per_label_df = pd.read_csv('../twitter/data/postprocessed/last_hour_embeddings_per_label_coolcats_07-10_2021.csv')

KeyboardInterrupt: 

In [48]:
# Compute cosine similarity against all labels.

def getCosineSimilaritiesPerLabel(df, tweetsColumn, embeddingsPerLabel, labels):
    cosineSimilarities = []
    
    for i in range(len(df[tweetsColumn])):
        tweets = df[tweetsColumn][i]
        
        if i % 100 == 0:
            print("Computing cosine similarities for tweets " + str(i) + " out of " + str(len(df[tweetsColumn])))
        
        tweetsEmbedding = getDistilbertEmbeddingForText(tweets)
        
        cosineSimilarityPerLabel = []
        for label in labels:
            cosineSimilarityPerLabel.append(cosineSimilarity(tweetsEmbedding, embeddingsPerLabel[label]))
            
        cosineSimilarities.append(cosineSimilarityPerLabel)
        
    return cosineSimilarities
        

# Compute cosine similarities only if we haven't computed them before.
if computeCosineSimilarities:
    startTime = time.time()

    cosineSimilarities = getCosineSimilaritiesPerLabel(sale_pairs_df, \
                                                   'last_hour_tweets',\
                                                   lastHourDistilbertEmbeddingsByLabel,\
                                                  ["UP", "DOWN", "FLAT"])

    endTime = time.time()

    print("Finished computing cosine similarities after " + str(endTime - startTime) + " seconds")

In [49]:
# Create last-hour cosine similarities dataframe
if computeCosineSimilarities:
    last_hour_cosine_similarities_df = pd.DataFrame(data=cosineSimilarities, columns=["UP", "DOWN", "FLAT"])

    last_hour_cosine_similarities_df.head(10)

In [50]:
#Store last-hour cosine similarities.
if computeCosineSimilarities:
    last_hour_cosine_similarities_df.to_csv('../twitter/data/postprocessed/last_hour_cosine_similarities_coolcats_07-10_2021.csv', index = True)

In [51]:
last_hour_cosine_similarities_df = pd.read_csv('../twitter/data/postprocessed/last_hour_cosine_similarities_coolcats_07-10_2021.csv')

In [52]:
sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111


In [54]:
# Add the cosine similarities for the last hour of tweets to the sale_pairs_df.
sale_pairs_df["up_last_hour"] = last_hour_cosine_similarities_df["UP"]
sale_pairs_df["down_last_hour"] = last_hour_cosine_similarities_df["DOWN"]
sale_pairs_df["flat_last_hour"] = last_hour_cosine_similarities_df["FLAT"]
sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,last_hour_avg_sentiment,last_hour_pos_tweets,up_last_hour,down_last_hour,flat_last_hour
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,0.461651,0.460905,0.387524,1.0,0.384733
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,0.570819,0.613139,1.0,0.387524,0.396572
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,0.39788,0.380282,0.387349,0.38688,0.383842
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,0.694888,0.75,0.397461,0.399703,0.399968
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,0.578475,0.586466,0.380741,0.390007,0.377293
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,0.634776,0.604651,0.396572,0.384733,1.0
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,0.579229,0.522388,0.402898,0.3824,0.390657
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,0.680168,0.708333,0.385264,0.394358,0.387613
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,0.73933,0.74744,0.373806,0.36559,0.381575
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,0.734631,0.738739,0.385294,0.403803,0.39045


In [55]:
# Add the label with maximum cosine similarity as a feature.
sale_pairs_df.loc[(sale_pairs_df['up_last_hour'] >= sale_pairs_df['down_last_hour']) \
             & (sale_pairs_df['up_last_hour'] >= sale_pairs_df['flat_last_hour']),\
                  'last_hour_closest_cosine_similarity_label'] = "UP"
sale_pairs_df.loc[(sale_pairs_df['down_last_hour'] >= sale_pairs_df['up_last_hour']) \
             & (sale_pairs_df['down_last_hour'] >= sale_pairs_df['flat_last_hour']),\
                  'last_hour_closest_cosine_similarity_label'] = "DOWN"
sale_pairs_df.loc[(sale_pairs_df['flat_last_hour'] >= sale_pairs_df['up_last_hour']) \
             & (sale_pairs_df['flat_last_hour'] >= sale_pairs_df['down_last_hour']),\
                  'last_hour_closest_cosine_similarity_label'] = "FLAT"

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,last_hour_avg_sentiment,last_hour_pos_tweets,up_last_hour,down_last_hour,flat_last_hour,last_hour_closest_cosine_similarity_label
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,0.461651,0.460905,0.387524,1.0,0.384733,DOWN
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,0.570819,0.613139,1.0,0.387524,0.396572,UP
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,0.39788,0.380282,0.387349,0.38688,0.383842,UP
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,0.694888,0.75,0.397461,0.399703,0.399968,FLAT
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,0.578475,0.586466,0.380741,0.390007,0.377293,DOWN
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,0.634776,0.604651,0.396572,0.384733,1.0,FLAT
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,0.579229,0.522388,0.402898,0.3824,0.390657,UP
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,0.680168,0.708333,0.385264,0.394358,0.387613,DOWN
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,0.73933,0.74744,0.373806,0.36559,0.381575,FLAT
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,0.734631,0.738739,0.385294,0.403803,0.39045,DOWN


In [56]:
# Convert 'last_hour_closest_cosine_similarity_label' to -1,0,1.
sale_pairs_df.loc[sale_pairs_df['last_hour_closest_cosine_similarity_label'] == "UP", \
'numeric_last_hour_closest_cosine_similarity_label'] = 1
sale_pairs_df.loc[sale_pairs_df['last_hour_closest_cosine_similarity_label'] == "DOWN", \
'numeric_last_hour_closest_cosine_similarity_label'] = -1
sale_pairs_df.loc[sale_pairs_df['last_hour_closest_cosine_similarity_label'] == "FLAT", \
'numeric_last_hour_closest_cosine_similarity_label'] = 0

In [57]:
# Convert 'five_percent_momentum' to -1,0,1.
sale_pairs_df.loc[sale_pairs_df['five_percent_momentum'] == "UP", \
'numeric_five_percent_momentum'] = 1
sale_pairs_df.loc[sale_pairs_df['five_percent_momentum'] == "DOWN", \
'numeric_five_percent_momentum'] = -1
sale_pairs_df.loc[sale_pairs_df['five_percent_momentum'] == "FLAT", \
'numeric_five_percent_momentum'] = 0

In [58]:
sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,last_hour_avg_sentiment,last_hour_pos_tweets,up_last_hour,down_last_hour,flat_last_hour,last_hour_closest_cosine_similarity_label,numeric_last_hour_closest_cosine_similarity_label,numeric_five_percent_momentum
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,0.461651,0.460905,0.387524,1.0,0.384733,DOWN,-1.0,-1.0
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,0.570819,0.613139,1.0,0.387524,0.396572,UP,1.0,1.0
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,0.39788,0.380282,0.387349,0.38688,0.383842,UP,1.0,1.0
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,0.694888,0.75,0.397461,0.399703,0.399968,FLAT,0.0,-1.0
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,0.578475,0.586466,0.380741,0.390007,0.377293,DOWN,-1.0,1.0
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,0.634776,0.604651,0.396572,0.384733,1.0,FLAT,0.0,0.0
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,0.579229,0.522388,0.402898,0.3824,0.390657,UP,1.0,1.0
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,0.680168,0.708333,0.385264,0.394358,0.387613,DOWN,-1.0,1.0
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,0.73933,0.74744,0.373806,0.36559,0.381575,FLAT,0.0,1.0
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,0.734631,0.738739,0.385294,0.403803,0.39045,DOWN,-1.0,1.0


In [59]:
# Add LDA topic modeling for the last hour of tweets.
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

startTime = time.time()

tfidVectorizer = TfidfVectorizer(stop_words=stopwords.words('english'))
vectorizedText = tfidVectorizer.fit_transform(sale_pairs_df['last_hour_tweets'])

ldaModel = LatentDirichletAllocation(n_components=10,learning_method='online',random_state=42)

ldaTopics = ldaModel.fit_transform(vectorizedText)

endTime = time.time()

print("Finished computing LDA topic modeling after " + str(endTime - startTime) + " seconds.")

Finished computing LDA topic modeling after 32.50653290748596 seconds.


In [60]:
ldaTopics[0]

array([0.00746157, 0.53683045, 0.00746157, 0.00746157, 0.00746974,
       0.00746171, 0.00746692, 0.00746157, 0.40346326, 0.00746163])

In [653]:
# Add LDA topic modeling for the last halfday of tweets.

startTime = time.time()

tfidVectorizer = TfidfVectorizer(stop_words=stopwords.words('english'))
lastHalfDayVectorizedText = tfidVectorizer.fit_transform(sale_pairs_df['last_halfday_tweets'])

ldaModel = LatentDirichletAllocation(n_components=10,learning_method='online',random_state=42)

ldaTopicsLastHalfDay = ldaModel.fit_transform(lastHalfDayVectorizedText)

endTime = time.time()

print("Finished computing last halfday LDA topic modeling after " + str(endTime - startTime) + " seconds.")

Finished computing last halfday LDA topic modeling after 23.644343376159668 seconds.


In [None]:
# Add LDA topic modeling for the last day of tweets.

startTime = time.time()

tfidVectorizer = TfidfVectorizer(stop_words=stopwords.words('english'))
lastDayVectorizedText = tfidVectorizer.fit_transform(sale_pairs_df['last_day_tweets'])

ldaModel = LatentDirichletAllocation(n_components=10,learning_method='online',random_state=42)

ldaTopicsLastDay = ldaModel.fit_transform(lastDayVectorizedText)

endTime = time.time()

print("Finished computing last day LDA topic modeling after " + str(endTime - startTime) + " seconds.")

In [61]:
# Create LDA topics DF for the last hour.
last_hour_lda_topics_df = pd.DataFrame(data = ldaTopics, columns = ["last_hour_topic_" + str(i) for i in range(10)])

last_hour_lda_topics_df.head(10)

Unnamed: 0,last_hour_topic_0,last_hour_topic_1,last_hour_topic_2,last_hour_topic_3,last_hour_topic_4,last_hour_topic_5,last_hour_topic_6,last_hour_topic_7,last_hour_topic_8,last_hour_topic_9
0,0.007462,0.53683,0.007462,0.007462,0.00747,0.007462,0.007467,0.007462,0.403463,0.007462
1,0.005589,0.671481,0.005589,0.005589,0.005589,0.005589,0.00559,0.005589,0.005589,0.283805
2,0.006734,0.462998,0.006734,0.006734,0.006734,0.00674,0.006735,0.006734,0.006734,0.483122
3,0.009024,0.441034,0.009024,0.009024,0.486775,0.009024,0.009024,0.009024,0.009024,0.009024
4,0.008887,0.92002,0.008887,0.008887,0.008887,0.008887,0.008887,0.008887,0.008887,0.008887
5,0.007751,0.930238,0.007751,0.007751,0.007751,0.007751,0.007753,0.007751,0.007751,0.007751
6,0.008117,0.926947,0.008117,0.008117,0.008117,0.008117,0.008119,0.008117,0.008117,0.008117
7,0.008513,0.477369,0.008513,0.008513,0.454525,0.008515,0.008513,0.008513,0.008513,0.008513
8,0.007746,0.930287,0.007746,0.007746,0.007746,0.007746,0.007747,0.007746,0.007746,0.007746
9,0.010773,0.903041,0.010773,0.010773,0.010773,0.010773,0.010773,0.010773,0.010773,0.010773


In [654]:
# Create LDA topics DF for the last halfday.
last_halfday_lda_topics_df = pd.DataFrame(data = ldaTopicsLastHalfDay, columns = ["last_halfday_topic_" + str(i) for i in range(10)])

last_halfday_lda_topics_df.head(10)

Unnamed: 0,last_halfday_topic_0,last_halfday_topic_1,last_halfday_topic_2,last_halfday_topic_3,last_halfday_topic_4,last_halfday_topic_5,last_halfday_topic_6,last_halfday_topic_7,last_halfday_topic_8,last_halfday_topic_9
0,0.007631,0.007632,0.007631,0.007631,0.007631,0.007631,0.007631,0.93132,0.007631,0.007631
1,0.00571,0.00571,0.00571,0.00571,0.00571,0.00571,0.00571,0.948608,0.00571,0.00571
2,0.005244,0.005244,0.22912,0.005244,0.005245,0.005244,0.005244,0.728924,0.005244,0.005244
3,0.005056,0.005056,0.005056,0.005056,0.005056,0.005056,0.005056,0.954496,0.005056,0.005056
4,0.009204,0.009204,0.009204,0.009204,0.009204,0.009204,0.009204,0.917162,0.009204,0.009204
5,0.00774,0.00774,0.00774,0.00774,0.00774,0.00774,0.00774,0.930344,0.00774,0.00774
6,0.011116,0.011116,0.011116,0.011116,0.011116,0.011116,0.011116,0.89996,0.011116,0.011116
7,0.005066,0.005066,0.005066,0.005066,0.005066,0.005066,0.005066,0.954406,0.005066,0.005066
8,0.008081,0.008081,0.008081,0.008081,0.008081,0.008081,0.008081,0.927269,0.008081,0.008081
9,0.008419,0.008419,0.008419,0.008419,0.008419,0.008419,0.008419,0.92423,0.008419,0.008419


In [519]:
# Create LDA topics DF for the last day.
last_day_lda_topics_df = pd.DataFrame(data = ldaTopicsLastDay, columns = ["last_day_topic_" + str(i) for i in range(10)])

last_day_lda_topics_df.head(10)

Unnamed: 0,last_day_topic_0,last_day_topic_1,last_day_topic_2,last_day_topic_3,last_day_topic_4,last_day_topic_5,last_day_topic_6,last_day_topic_7,last_day_topic_8,last_day_topic_9
0,0.947817,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798
1,0.958965,0.00456,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559
2,0.951416,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398
3,0.953254,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194
4,0.925136,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318
5,0.901941,0.010895,0.010895,0.010895,0.010895,0.010895,0.010895,0.010895,0.010896,0.010895
6,0.899919,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112
7,0.953116,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209
8,0.927354,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072
9,0.921743,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695


In [62]:
# Append LDA topic DF to main DF.
for i in range(10):
    topicLabel = "last_hour_topic_" + str(i)
    sale_pairs_df[topicLabel] = last_hour_lda_topics_df[topicLabel]

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,...,last_hour_topic_0,last_hour_topic_1,last_hour_topic_2,last_hour_topic_3,last_hour_topic_4,last_hour_topic_5,last_hour_topic_6,last_hour_topic_7,last_hour_topic_8,last_hour_topic_9
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,...,0.007462,0.53683,0.007462,0.007462,0.00747,0.007462,0.007467,0.007462,0.403463,0.007462
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,...,0.005589,0.671481,0.005589,0.005589,0.005589,0.005589,0.00559,0.005589,0.005589,0.283805
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,...,0.006734,0.462998,0.006734,0.006734,0.006734,0.00674,0.006735,0.006734,0.006734,0.483122
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,...,0.009024,0.441034,0.009024,0.009024,0.486775,0.009024,0.009024,0.009024,0.009024,0.009024
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,...,0.008887,0.92002,0.008887,0.008887,0.008887,0.008887,0.008887,0.008887,0.008887,0.008887
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,...,0.007751,0.930238,0.007751,0.007751,0.007751,0.007751,0.007753,0.007751,0.007751,0.007751
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,...,0.008117,0.926947,0.008117,0.008117,0.008117,0.008117,0.008119,0.008117,0.008117,0.008117
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,...,0.008513,0.477369,0.008513,0.008513,0.454525,0.008515,0.008513,0.008513,0.008513,0.008513
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,...,0.007746,0.930287,0.007746,0.007746,0.007746,0.007746,0.007747,0.007746,0.007746,0.007746
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,...,0.010773,0.903041,0.010773,0.010773,0.010773,0.010773,0.010773,0.010773,0.010773,0.010773


In [655]:
# Append last halfday LDA topic DF to main DF.
for i in range(10):
    topicLabel = "last_halfday_topic_" + str(i)
    sale_pairs_df[topicLabel] = last_halfday_lda_topics_df[topicLabel]

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,...,last_halfday_topic_0,last_halfday_topic_1,last_halfday_topic_2,last_halfday_topic_3,last_halfday_topic_4,last_halfday_topic_5,last_halfday_topic_6,last_halfday_topic_7,last_halfday_topic_8,last_halfday_topic_9
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,...,0.007631,0.007632,0.007631,0.007631,0.007631,0.007631,0.007631,0.93132,0.007631,0.007631
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,...,0.00571,0.00571,0.00571,0.00571,0.00571,0.00571,0.00571,0.948608,0.00571,0.00571
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,...,0.005244,0.005244,0.22912,0.005244,0.005245,0.005244,0.005244,0.728924,0.005244,0.005244
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,...,0.005056,0.005056,0.005056,0.005056,0.005056,0.005056,0.005056,0.954496,0.005056,0.005056
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,...,0.009204,0.009204,0.009204,0.009204,0.009204,0.009204,0.009204,0.917162,0.009204,0.009204
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,...,0.00774,0.00774,0.00774,0.00774,0.00774,0.00774,0.00774,0.930344,0.00774,0.00774
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,...,0.011116,0.011116,0.011116,0.011116,0.011116,0.011116,0.011116,0.89996,0.011116,0.011116
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,...,0.005066,0.005066,0.005066,0.005066,0.005066,0.005066,0.005066,0.954406,0.005066,0.005066
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,...,0.008081,0.008081,0.008081,0.008081,0.008081,0.008081,0.008081,0.927269,0.008081,0.008081
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,...,0.008419,0.008419,0.008419,0.008419,0.008419,0.008419,0.008419,0.92423,0.008419,0.008419


In [520]:
# Append last day LDA topic DF to main DF.
for i in range(10):
    topicLabel = "last_day_topic_" + str(i)
    sale_pairs_df[topicLabel] = last_day_lda_topics_df[topicLabel]

sale_pairs_df.head(10)

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,...,last_day_topic_0,last_day_topic_1,last_day_topic_2,last_day_topic_3,last_day_topic_4,last_day_topic_5,last_day_topic_6,last_day_topic_7,last_day_topic_8,last_day_topic_9
0,9932,2021-10-11T17:16:09.440387,1.049e+19,2021-10-21T19:16:22.725846,6.55e+18,-0.375596,DOWN,RT @Cryptomesssiah: GIVEAWAY OF RARE #CoolCats...,RT @gimmocrypto: This gif of @mutantcats purch...,243,...,0.947817,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798,0.005798
1,9932,2021-09-23T08:30:18.304058,6e+18,2021-10-11T17:16:09.440387,1.049e+19,0.748333,UP,Do you know why square cats are such good pets...,Do you know why square cats are such good pets...,137,...,0.958965,0.00456,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559,0.004559
2,9932,2021-08-29T01:44:44.449125,5.25e+18,2021-09-23T08:30:18.304058,6e+18,0.142857,UP,"#24px - anon dev. no roadmap. just pixels, cat...","#24px - anon dev. no roadmap. just pixels, cat...",71,...,0.951416,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398,0.005398
3,9925,2021-08-13T17:59:33.661011,1.66e+18,2021-08-17T03:46:32.538134,1.51e+18,-0.090361,DOWN,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,RT @XCOPYPUNKS: XCOPYCATS GiveawayWe're giving...,20,...,0.953254,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194,0.005194
4,9925,2021-08-04T23:52:14.987776,1.15e+18,2021-08-13T17:59:33.661011,1.66e+18,0.443478,UP,RT @BullieverIsland: Celebrating Citizens of B...,RT @BullieverIsland: Celebrating Citizens of B...,266,...,0.925136,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318,0.008318
5,9921,2021-08-22T11:05:32.621573,1.64e+18,2021-08-22T14:51:26.933741,1.649e+18,0.005488,FLAT,RT @DamianSpriggs: I have always wanted to be ...,RT @DamianSpriggs: I have always wanted to be ...,172,...,0.901941,0.010895,0.010895,0.010895,0.010895,0.010895,0.010895,0.010895,0.010896,0.010895
6,9921,2021-08-17T04:39:51.161269,1.32e+18,2021-08-22T11:05:32.621573,1.64e+18,0.242424,UP,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,RT @DrugstoreNfts: DROP NEWSTODAY 10 PM GMT /...,134,...,0.899919,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112,0.01112
7,9921,2021-07-30T16:46:54.052325,6.5e+17,2021-08-17T04:39:51.161269,1.32e+18,1.030769,UP,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,@zachward69 #Coolcats BEEP BOOP Cool Cat #5...,24,...,0.953116,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209,0.005209
8,9920,2021-08-24T03:29:15.074609,3.3568e+18,2021-08-24T22:21:08.871690,4.9e+18,0.459724,UP,Omg looooool 2.2M followers 50E saleI can't g...,Omg looooool 2.2M followers 50E saleI can't g...,586,...,0.927354,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072,0.008072
9,9920,2021-08-23T17:24:06.385908,2.11e+18,2021-08-24T03:29:15.074609,3.3568e+18,0.5909,UP,RT @SpaceLabCrypto: If you are worried that yo...,RT @SpaceLabCrypto: If you are worried that yo...,111,...,0.921743,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695,0.008695


In [656]:
# Feature selection.

cosine_similarity_features = ['up_last_hour', 'down_last_hour', 'flat_last_hour', 'numeric_last_hour_closest_cosine_similarity_label']
tweets_features = ['num_last_hour_tweets', 'num_last_halfday_tweets', 'num_last_day_tweets']
lda_topic_modeling_features = ["last_hour_topic_" + str(i) for i in range(10)] + ["last_day_topic_" + str(i) for i in range(10)] + ["last_halfday_topic_" + str(i) for i in range(10)]

sentiment_analysis_features = ["last_hour_avg_sentiment", "last_hour_pos_tweets", "last_halfday_avg_sentiment", "last_halfday_pos_tweets", "last_day_avg_sentiment", "last_day_pos_tweets"]
                   
features = cosine_similarity_features + tweets_features + lda_topic_modeling_features + sentiment_analysis_features

label = 'numeric_five_percent_momentum'
#label = 'five_percent_momentum'

In [657]:
# Split into 90% training and 10% test.
from sklearn.model_selection import train_test_split

X = sale_pairs_df[features]
y = sale_pairs_df[[label]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [658]:
# Predicting according to the majority class.
print((1 == y_train["numeric_five_percent_momentum"]).sum() / y_train.count())

# Using cosine similarity for prediction.
print((X_train["numeric_last_hour_closest_cosine_similarity_label"] == y_train["numeric_five_percent_momentum"]).sum() / y_train.count())

print((X_train["numeric_last_hour_closest_cosine_similarity_label"] == 1).sum())

numeric_five_percent_momentum    0.893843
dtype: float64
numeric_five_percent_momentum    0.307148
dtype: float64
445


In [659]:
def trainAndTestModel(model, X_train, y_train, X_test, y_test):
    model.fit(X_train,y_train)
    y_predict = model.predict(X_test)
    print("The test accuracy of the model is: " + str(accuracy_score(y_test,y_predict))) 
    print("The ROC AUC score of the model is: " + str(roc_auc_score(y_test, model.predict_proba(X_test),multi_class='ovr')))

In [660]:
# Train a decision tree classifier.
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.tree import DecisionTreeClassifier
trainAndTestModel(DecisionTreeClassifier(random_state = 42), X_train, y_train, X_test, y_test)

The test accuracy of the model is: 0.8291139240506329
The ROC AUC score of the model is: 0.5453576742343963


In [661]:
# Train a Gaussian Naie Bayes.
from sklearn.naive_bayes import GaussianNB
trainAndTestModel(GaussianNB(), X_train, y_train, X_test, y_test)

The test accuracy of the model is: 0.879746835443038
The ROC AUC score of the model is: 0.6590675255347632


  return f(*args, **kwargs)


In [662]:
# Training and testing a Random Forest.
from sklearn.ensemble import RandomForestClassifier
trainAndTestModel(RandomForestClassifier(random_state = 42), X_train, y_train, X_test, y_test)

  model.fit(X_train,y_train)


The test accuracy of the model is: 0.8987341772151899
The ROC AUC score of the model is: 0.545464753710515


In [663]:
# Extract balanced samples.
cloned_sale_pairs_df = sale_pairs_df.copy()

# Extract 50 samples per class.
up_sample_df = cloned_sale_pairs_df.loc[sale_pairs_df['five_percent_momentum'] == "UP",].sample(n=50, random_state=42)
down_sample_df = cloned_sale_pairs_df.loc[sale_pairs_df['five_percent_momentum'] == "DOWN",].sample(n=50, random_state=42)
flat_sample_df = cloned_sale_pairs_df.loc[sale_pairs_df['five_percent_momentum'] == "FLAT",].sample(n=50, random_state=42)

samples_df = pd.concat([up_sample_df, down_sample_df, flat_sample_df])

# Drop samples from source DF.
cloned_sale_pairs_df = cloned_sale_pairs_df.drop(samples_df.index)

samples_df

Unnamed: 0,id,start_sale_date,start_sale_price,end_sale_date,end_sale_price,percent_change,five_percent_momentum,all_tweets,last_hour_tweets,num_last_hour_tweets,...,last_halfday_topic_0,last_halfday_topic_1,last_halfday_topic_2,last_halfday_topic_3,last_halfday_topic_4,last_halfday_topic_5,last_halfday_topic_6,last_halfday_topic_7,last_halfday_topic_8,last_halfday_topic_9
1198,9122,2021-07-11T07:36:03.317093,1.979000e+18,2021-08-02T15:46:46.489656,2.220000e+18,0.121779,UP,RT @markshaw: Remember: They'll come for the a...,RT @markshaw: Remember: They'll come for the a...,175,...,0.006185,0.006185,0.006185,0.006185,0.006185,0.006185,0.006185,0.944338,0.006185,0.006185
1132,9170,2021-07-07T15:40:36.198903,4.000000e+17,2021-07-07T18:06:27.996699,4.880000e+17,0.220000,UP,RT @sonirious: CoolCats are going to flip apes...,RT @sonirious: CoolCats are going to flip apes...,1105,...,0.011560,0.011561,0.011560,0.011560,0.011560,0.011560,0.011560,0.895956,0.011560,0.011560
731,9440,2021-07-05T08:27:31.398556,8.414770e+16,2021-08-22T05:31:56.492745,1.600000e+18,18.014187,UP,RT @markshaw: What can I do for you that will ...,RT @xtremetom: We see you @dfinzer (co-founder...,103,...,0.011381,0.011381,0.011381,0.011381,0.011381,0.011381,0.011381,0.897571,0.011381,0.011381
506,9575,2021-07-07T04:46:43.475180,2.500000e+17,2021-07-08T02:58:47.414361,4.850000e+17,0.940000,UP,Whats up @coolcatsnft fam?!? What a day!! Foll...,Whats up @coolcatsnft fam?!? What a day!! Foll...,137,...,0.010800,0.010800,0.010800,0.010800,0.010800,0.010800,0.010800,0.902799,0.010800,0.010800
1445,8994,2021-07-04T10:33:57.321596,6.000000e+16,2021-07-04T18:10:32.626065,1.400000e+17,1.333333,UP,RT @Zvizvizvi: @PsyChrypto @alexgausman Only 1...,RT @Zvizvizvi: @PsyChrypto @alexgausman Only 1...,163,...,0.010274,0.010274,0.010274,0.010274,0.010274,0.010274,0.010274,0.907530,0.010274,0.010274
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,9918,2021-08-04T19:04:01.906638,1.550000e+18,2021-08-19T19:29:02.968073,1.490000e+18,-0.038710,FLAT,RT @BluuCatt: MIKE TYSON SAYING COOLCATS LIVE ...,RT @BluuCatt: MIKE TYSON SAYING COOLCATS LIVE ...,122,...,0.005963,0.005963,0.005963,0.005963,0.005963,0.005963,0.005963,0.946336,0.005963,0.005963
486,9602,2021-07-02T00:52:51.979627,1.980000e+16,2021-07-02T01:44:31.310903,1.950000e+16,-0.015152,FLAT,RT @Stanley_Studios: i love them both lol@Gut...,RT @Stanley_Studios: i love them both lol@Gut...,15,...,0.009991,0.009991,0.242369,0.009991,0.009991,0.009991,0.009991,0.677707,0.009991,0.009991
29,9907,2021-07-09T21:47:36.613699,9.999000e+17,2021-08-04T05:55:33.969680,9.900000e+17,-0.009901,FLAT,RT @markshaw: What can I do for you that will ...,RT @SunkenSquidNFT: SUNKEN SQUIDS x COOL CAT N...,45,...,0.007494,0.007494,0.007494,0.007494,0.007494,0.007494,0.007494,0.932553,0.007494,0.007494
1484,8976,2021-09-04T11:39:54.807283,4.480000e+18,2021-09-11T12:24:58.355250,4.590000e+18,0.024554,FLAT,RT @MonasNFT: SELLOUT INCOMING 245 ETH 1.80...,RT @MonasNFT: SELLOUT INCOMING 245 ETH 1.80...,776,...,0.019344,0.415590,0.019344,0.019344,0.019344,0.019344,0.019344,0.429661,0.019344,0.019344


In [664]:
# Create training and test for balanced dataset.
X_train = cloned_sale_pairs_df[features]
y_train = cloned_sale_pairs_df[[label]]
X_test = samples_df[features]
y_test = samples_df[[label]]

In [665]:
# Train Decision Tree Classifier.
decisionTreeModel = DecisionTreeClassifier(random_state = 42)   
decisionTreeModel.fit(X_train,y_train)

DecisionTreeClassifier(random_state=42)

In [666]:
# Call predictiion
y_predict = decisionTreeModel.predict(X_test)

In [667]:
# Accuracy of the model.
accuracy_score(y_test,y_predict)

0.37333333333333335

In [668]:
# Accuracy of the baseline.
(y_test == 1).sum() / y_test.count()

numeric_five_percent_momentum    0.333333
dtype: float64

In [669]:
print((sale_pairs_df["five_percent_momentum"] =="UP").sum())
print((sale_pairs_df["five_percent_momentum"] =="DOWN").sum())
print((sale_pairs_df["five_percent_momentum"] =="FLAT").sum())

1406
101
64


In [670]:
(sale_pairs_df["five_percent_momentum"] == "UP").sum() / sale_pairs_df.count()

id                                                   0.894971
start_sale_date                                      0.894971
start_sale_price                                     0.894971
end_sale_date                                        0.894971
end_sale_price                                       0.894971
percent_change                                       0.894971
five_percent_momentum                                0.894971
all_tweets                                           0.894971
last_hour_tweets                                     0.894971
num_last_hour_tweets                                 0.894971
last_hour_avg_sentiment                              0.894971
last_hour_pos_tweets                                 0.894971
up_last_hour                                         0.894971
down_last_hour                                       0.894971
flat_last_hour                                       0.894971
last_hour_closest_cosine_similarity_label            0.894971
numeric_

In [671]:
len(concatenatedTweetsBetweenSaleEvents[3])

89372

In [672]:
# Training and testing a Random Forest.
from sklearn.ensemble import RandomForestClassifier
trainAndTestModel(RandomForestClassifier(random_state = 42), X_train, y_train, X_test, y_test)

  model.fit(X_train,y_train)


The test accuracy of the model is: 0.3466666666666667
The ROC AUC score of the model is: 0.5679666666666666


In [673]:
# Training and testing a Decision Tree Classifier.
from sklearn.tree import DecisionTreeClassifier
trainAndTestModel(DecisionTreeClassifier(random_state = 42), X_train, y_train, X_test, y_test)

The test accuracy of the model is: 0.37333333333333335
The ROC AUC score of the model is: 0.5297999999999999


In [674]:
# Training and testing a Gaussian Naive Bayes Classifier.
from sklearn.naive_bayes import GaussianNB
trainAndTestModel(GaussianNB(), X_train, y_train, X_test, y_test)

The test accuracy of the model is: 0.42
The ROC AUC score of the model is: 0.5958


  return f(*args, **kwargs)


In [675]:
from sklearn.neighbors import KNeighborsClassifier
trainAndTestModel(KNeighborsClassifier(n_neighbors=3), X_train, y_train, X_test, y_test)

The test accuracy of the model is: 0.36666666666666664
The ROC AUC score of the model is: 0.5426666666666667


  return self._fit(X, y)


In [676]:
from sklearn.svm import SVC
trainAndTestModel(SVC(random_state=42, probability=True), X_train, y_train, X_test, y_test)

  return f(*args, **kwargs)


The test accuracy of the model is: 0.3333333333333333
The ROC AUC score of the model is: 0.5385333333333334


In [677]:
from sklearn.ensemble import AdaBoostClassifier
trainAndTestModel(AdaBoostClassifier(random_state=42), X_train, y_train, X_test, y_test)

  return f(*args, **kwargs)


The test accuracy of the model is: 0.36666666666666664
The ROC AUC score of the model is: 0.5732666666666667


In [678]:
from sklearn.neural_network import MLPClassifier
trainAndTestModel(MLPClassifier(random_state=42), X_train, y_train, X_test, y_test)

  return f(*args, **kwargs)


The test accuracy of the model is: 0.3333333333333333
The ROC AUC score of the model is: 0.5756333333333333


In [679]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
trainAndTestModel(QuadraticDiscriminantAnalysis(), X_train, y_train, X_test, y_test)

The test accuracy of the model is: 0.29333333333333333
The ROC AUC score of the model is: 0.4501


  return f(*args, **kwargs)


In [487]:
# Running LSTM.
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Input
from keras.models import Model

# Convert y to one-hot-encoding
y_train_array = []

#print(y_train)
for x in y_train['numeric_five_percent_momentum']:
    if x == -1:
        y_train_array.append([0,0,1])
    elif x == 0:
        y_train_array.append([0,1,0])
    else:
        y_train_array.append([1, 0, 0])

# Convert the x_train and y_train to numpy arrays 
x_train_array, y_train_array = np.array(X_train), np.array(y_train_array)

#print(y_train_array)
# Reshape the time series data.
x_train_array = x_train_array.reshape(-1, len(features))

y_train_array = y_train_array.reshape(-1, 3)

#print(y_train_array.shape)
    
x_train_array = np.reshape(x_train_array, (x_train_array.shape[0], x_train_array.shape[1], 1))
    
# Build the LSTM model
print("Training Model...")
model = Sequential()
    
#model.add(LSTM(8, return_sequences=False, input_shape= (x_train_array.shape[1], 1)))
    
#model.add(Dense(25))
#model.add(Dense(1))

model.add(LSTM(16, return_sequences=False, input_shape= (x_train_array.shape[1], 1)))

#model.add(LSTM(8, dropout=0.5, recurrent_dropout=0.5, return_sequences=False, input_shape= (x_train_array.shape[1], 1)))
#model.add(Dropout(0.5))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(8, activation = 'relu'))
model.add(Dense(84, activation = 'relu'))
#model.add(Dropout(0.5))
#model.add(Dense(32, activation = 'relu'))
#model.add(Dense(32, activation = 'relu'))
#model.add(Dense(1))

# Output layer
model.add(Dense(3, activation='softmax'))

# Compile the model
#model.compile(optimizer='adam', loss='mean_squared_error')

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'AUC'])

#print(x_train_array.shape)
# Train the model
model.fit(x_train_array, y_train_array, batch_size=1, epochs=10)

Training Model...
(1421, 17, 1)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x262e6793b80>

In [474]:
# Compute predictions.
    
# Convert the x_test and y_test to numpy arrays 
x_test_array, y_test_array = np.array(X_test), np.array(y_test)
    
# Reshape the data
x_test_array = np.reshape(x_test_array, (x_test_array.shape[0], x_test_array.shape[1], 1 ))
    
# Get the models predicted price values
predictions = model.predict(x_test_array)
predictions = [x[0] for x in predictions]

# Results of Time Series.
#print("Mean squared error: %.2f" % mean_squared_error(y_test_time_series, predictions))
#print("R2 score: %.2f" % r2_score(y_test_time_series, predictions))
print(predictions)


#print([x for x in predictions if x < 0.5])
#print("The test accuracy of the model is: " + str(accuracy_score(y_test,predictions))) 
#print("The ROC AUC score of the model is: " + str(roc_auc_score(y_test, model.predict_proba(X_test),multi_class='ovr')))

[0.96120656, 0.96572125, 0.9542049, 0.96518064, 0.950572, 0.96731967, 0.96509546, 0.95208967, 0.9580245, 0.9647503, 0.9487001, 0.96615565, 0.9535276, 0.94947994, 0.95588905, 0.9668296, 0.9650333, 0.96604353, 0.9565818, 0.9505772, 0.9548279, 0.95810646, 0.96831906, 0.956795, 0.959561, 0.9424213, 0.9619488, 0.96152174, 0.966359, 0.96324867, 0.95837957, 0.96155614, 0.9525469, 0.96051204, 0.96035075, 0.9643996, 0.96242106, 0.96581393, 0.95751184, 0.9491463, 0.9535425, 0.9519526, 0.95129794, 0.9563197, 0.9607807, 0.9637493, 0.94608253, 0.96451527, 0.9623212, 0.9565061, 0.96049654, 0.96047455, 0.9647034, 0.9538781, 0.9418408, 0.95811236, 0.9636537, 0.96136856, 0.9595508, 0.9560213, 0.9610932, 0.9609427, 0.958822, 0.95941865, 0.96541715, 0.9474173, 0.9575514, 0.9578454, 0.9582626, 0.962513, 0.9656632, 0.9597129, 0.95344484, 0.96862495, 0.9652203, 0.95184165, 0.95481235, 0.9628366, 0.961178, 0.96035933, 0.9596901, 0.94757044, 0.94665945, 0.94983625, 0.96519256, 0.94008136, 0.96585697, 0.953606

In [488]:
# Running RNN.
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Input, SimpleRNN
from keras.models import Model

# Convert y to one-hot-encoding
y_train_array = []

#print(y_train)
for x in y_train['numeric_five_percent_momentum']:
    if x == -1:
        y_train_array.append([0,0,1])
    elif x == 0:
        y_train_array.append([0,1,0])
    else:
        y_train_array.append([1, 0, 0])

# Convert the x_train and y_train to numpy arrays 
x_train_array, y_train_array = np.array(X_train), np.array(y_train_array)

#print(y_train_array)
# Reshape the time series data.
x_train_array = x_train_array.reshape(-1, len(features))

y_train_array = y_train_array.reshape(-1, 3)

#print(y_train_array.shape)
    
x_train_array = np.reshape(x_train_array, (x_train_array.shape[0], x_train_array.shape[1], 1))
    
# Build the LSTM model
print("Training Model...")
model = Sequential()
    
#model.add(LSTM(8, return_sequences=False, input_shape= (x_train_array.shape[1], 1)))
    
#model.add(Dense(25))
#model.add(Dense(1))

model.add(SimpleRNN(16, return_sequences=False, input_shape= (x_train_array.shape[1], 1)))

#model.add(LSTM(8, dropout=0.5, recurrent_dropout=0.5, return_sequences=False, input_shape= (x_train_array.shape[1], 1)))
#model.add(Dropout(0.5))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(8, activation = 'relu'))
model.add(Dense(84, activation = 'relu'))
#model.add(Dropout(0.5))
#model.add(Dense(32, activation = 'relu'))
#model.add(Dense(32, activation = 'relu'))
#model.add(Dense(1))

# Output layer
model.add(Dense(3, activation='softmax'))

# Compile the model
#model.compile(optimizer='adam', loss='mean_squared_error')

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'AUC'])

#print(x_train_array.shape)
# Train the model
model.fit(x_train_array, y_train_array, batch_size=1, epochs=10)


Training Model...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x262cd27fee0>

In [489]:
# Compute predictions.
    
# Convert the x_test and y_test to numpy arrays 
x_test_array, y_test_array = np.array(X_test), np.array(y_test)
    
# Reshape the data
x_test_array = np.reshape(x_test_array, (x_test_array.shape[0], x_test_array.shape[1], 1 ))
    
# Get the models predicted price values
predictions = model.predict(x_test_array)
predictions = [x[0] for x in predictions]

# Results of Time Series.
#print("Mean squared error: %.2f" % mean_squared_error(y_test_time_series, predictions))
#print("R2 score: %.2f" % r2_score(y_test_time_series, predictions))
print(predictions)


#print([x for x in predictions if x < 0.5])
#print("The test accuracy of the model is: " + str(accuracy_score(y_test,predictions))) 
#print("The ROC AUC score of the model is: " + str(roc_auc_score(y_test, model.predict_proba(X_test),multi_class='ovr')))

[0.9378041, 0.9498523, 0.9092159, 0.94638443, 0.9399338, 0.95313525, 0.9479189, 0.9284715, 0.91607934, 0.94761777, 0.89710873, 0.9504062, 0.90570694, 0.9094533, 0.91980594, 0.9507792, 0.9399324, 0.950141, 0.9177998, 0.88740337, 0.9199323, 0.92914116, 0.9550062, 0.9261236, 0.9543824, 0.8600189, 0.93950456, 0.9257151, 0.9508583, 0.94307494, 0.91646254, 0.93964916, 0.9090423, 0.9368571, 0.92303705, 0.94630843, 0.93412566, 0.9499361, 0.91994697, 0.9190366, 0.9160005, 0.8935634, 0.9078365, 0.9247736, 0.9339077, 0.94413114, 0.88372207, 0.94707286, 0.94103014, 0.8854796, 0.93684316, 0.93609774, 0.96316916, 0.90761745, 0.85529715, 0.9294038, 0.95915854, 0.9372136, 0.93384266, 0.8937441, 0.9306186, 0.95874494, 0.9361268, 0.9227076, 0.94817376, 0.89063454, 0.9242442, 0.8982568, 0.9203962, 0.9375035, 0.9497349, 0.91010183, 0.9158298, 0.9438985, 0.948228, 0.9059465, 0.91160566, 0.9423904, 0.9587431, 0.93420106, 0.93377584, 0.8915121, 0.8381823, 0.9010755, 0.94816595, 0.8451957, 0.93350965, 0.90256

In [360]:
y_predict = model.predict(X_test)

In [361]:
print("The test accuracy of the model is: " + str(accuracy_score(y_test,y_predict))) 
#print("The ROC AUC score of the model is: " + str(roc_auc_score(y_test, model.predict_proba(X_test),multi_class='ovr')))

The test accuracy of the model is: 0.3333333333333333


In [152]:
print("The ROC AUC score of the baseline model is: " + str(roc_auc_score(y_test, [[1/3,1/3,1/3] for x in range(len(X_test))],multi_class='ovr')))

The ROC AUC score of the baseline model is: 0.5
