In this notebook we are using 3 pre-trained sentiment analysis models to give our tweets sentiment scores. The tweets were previously scraped from tweeter using keywords to find relevant tweets for the stock that we are predicting.

In [1]:
import pandas as pd
import nltk
import re
import string
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import TweetTokenizer
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import flair

In [2]:
#download vader from nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

#download stopwords for preprocessing
nltk.download('stopwords')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\lazar\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\lazar\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

Read the data

In [3]:
tweets = pd.read_csv('data/all_tweets.csv')
print(tweets.shape)
tweets.head()

(50721, 24)


Unnamed: 0,UserId,UserName,Verified,Location,Followers,Tweet,Date,Retweeted,Language,Likes Count,...,scores,compound,pos,neg,neu,type,Preprocessed Tweet,fb pos,fb neg,fb neu
0,1664015967280590849,TimeActionQuant,False,"Nashville, TN",1158,Intraday Cycles - 05/31/2023 - Results!\n\n#ES...,2023-05-31 21:08:17+00:00,,en,0,...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL,intraday cycl result es_f spx spi sp http,0.04988,0.018428,0.931692
1,1664013296616259584,solrevived4real,False,,91,couldn't have asked for a more perfect monthly...,2023-05-31 20:57:40+00:00,,en,0,...,"{'neg': 0.0, 'neu': 0.654, 'pos': 0.346, 'comp...",0.5719,0.346,0.0,0.654,POS,ask perfect monthli close spx spi sp http,0.047117,0.027944,0.924939
2,1664013202567446528,AnalyseBourses,False,New York Paris Hong Kong Tokyo,123,.\n\n#Parabolic SAR &amp; STA for the 06/01 wa...,2023-05-31 20:57:18+00:00,,en,0,...,"{'neg': 0.192, 'neu': 0.728, 'pos': 0.08, 'com...",-0.5994,0.08,0.192,0.728,NEG,parabol sar sta put onlin sp eurusd hangseng c...,0.093006,0.014138,0.892855
3,1664011816089927685,DisfoldAI,False,Internet,12829,"The #Nasdaq is trending up, the #Dow is trendi...",2023-05-31 20:51:47+00:00,,en,0,...,"{'neg': 0.0, 'neu': 0.59, 'pos': 0.41, 'compou...",0.9153,0.41,0.0,0.59,POS,nasdaq trend dow trend sp trade sideway one ri...,0.058029,0.023765,0.918206
4,1664010574517133312,sp500bullish,False,,12,@ardizor 0xB7e6691Bb0D09C8F988fDea6D3f2a89cFbe...,2023-05-31 20:46:51+00:00,,en,0,...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL,xbebbdcffdeadfacfbecd,0.062647,0.02008,0.917273


Define needed functions for preprocessing

In [4]:

def remove_hyperlinks(tweet):
    # it will remove the old style retweet text "RT"
    tweet2 = re.sub(r'^RT[\s]+', '', tweet)

    # it will remove hyperlinks
    tweet2 = re.sub(r'https?:\/\/.*[\r\n]*', 'http', tweet2)

    # it will remove hashtags. We have to be careful here not to remove
    # the whole hashtag because text of hashtags contains huge information.
    # only removing the hash # sign from the word
    tweet2 = re.sub(r'#', '', tweet2)

    # it will remove single numeric terms in the tweet.
    tweet2 = re.sub(r'[0-9]', '', tweet2)

    return tweet2


In [5]:
def tokenize(tweet):
    # instantiate the tokenizer class
    tokenizer = TweetTokenizer(preserve_case=False,
                               strip_handles=True,
                               reduce_len=True)

    # tokenize the tweets
    tweet_tokens = tokenizer.tokenize(tweet)
    return tweet_tokens

In [6]:
def remove_stopwords(tweet_tokens):
    stopwords_english = stopwords.words('english')
    tweets_clean = []

    for word in tweet_tokens: # Go through every word in your tokens list
        if (word not in stopwords_english and  # remove stopwords
                word not in string.punctuation):  # remove punctuation
            tweets_clean.append(word)
    return tweets_clean

In [7]:
def stemming(tweet_tokens):
    # Instantiate stemming class
    stemmer = PorterStemmer()

    # Create an empty list to store the stems
    tweets_stem = []

    for word in tweet_tokens:
        stem_word = stemmer.stem(word)  # stemming word
        tweets_stem.append(stem_word)  # append to the list

    return tweets_stem

In [8]:
def preprocess(tweet):
    tweet = remove_hyperlinks(tweet)
    tweet_tokens = tokenize(tweet)
    tweet_tokens = remove_stopwords(tweet_tokens)
    tweet_tokens = stemming(tweet_tokens)
    return " ".join(tweet_tokens)

VADER sentiment analysis

In [9]:
#creating an object of sentiment intensity analyzer (VADER)
sia= SentimentIntensityAnalyzer()

In [10]:
# augmenting VADER vocab with financial terms

positive_words='buy bull long support undervalued underpriced cheap upward rising trend moon rocket hold breakout call beat support buying holding high profit moon'
negative_words='sell bear bubble bearish short overvalued overbought overpriced expensive downward falling sold sell low put miss resistance squeeze cover seller '

dictOfpos = { i : 4 for i in positive_words.split(" ") }
dictOfneg = { i : -4 for i in negative_words.split(" ")  }
Financial_Lexicon = {**dictOfpos, **dictOfneg}

sia.lexicon.update(Financial_Lexicon)

In [11]:
#creating new column scores using polarity scores function
tweets['scores']=tweets['Tweet'].apply(lambda tweet: sia.polarity_scores(preprocess(tweet)))
tweets.head()

Unnamed: 0,UserId,UserName,Verified,Location,Followers,Tweet,Date,Retweeted,Language,Likes Count,Retweeted Count,Replied Count,Quoted Count,Hashtag,scores
0,1663621305269121028,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$QCO...,2023-05-30 19:00:02+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
1,1663591110063554562,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$QCO...,2023-05-30 17:00:03+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
2,1663560911146348547,CryptoTreeMap,False,,443,https://t.co/t6kVcSKEyf\n\nStock market:\n$AVG...,2023-05-30 15:00:03+00:00,,en,1,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
3,1663133044235018240,hard2predict,False,,438,De siste 30 år (1990-2020) ble all vekst i det...,2023-05-29 10:39:51+00:00,,no,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
4,1663056745596608514,TanArrowz,False,,1933,GS: China’s online gaming market is dominated ...,2023-05-29 05:36:40+00:00,,en,3,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 0.725, 'pos': 0.275, 'comp..."


In [12]:
#extracting specific score values
tweets['compound']=tweets['scores'].apply(lambda score_dict:score_dict['compound'])
tweets['pos']=tweets['scores'].apply(lambda pos_dict:pos_dict['pos'])
tweets['neg']=tweets['scores'].apply(lambda neg_dict:neg_dict['neg'])
tweets['neu']=tweets['scores'].apply(lambda neg_dict:neg_dict['neu'])

tweets.head(20)

Unnamed: 0,UserId,UserName,Verified,Location,Followers,Tweet,Date,Retweeted,Language,Likes Count,Retweeted Count,Replied Count,Quoted Count,Hashtag,scores,compound,pos,neg,neu
0,1663621305269121028,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$QCO...,2023-05-30 19:00:02+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0
1,1663591110063554562,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$QCO...,2023-05-30 17:00:03+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0
2,1663560911146348547,CryptoTreeMap,False,,443,https://t.co/t6kVcSKEyf\n\nStock market:\n$AVG...,2023-05-30 15:00:03+00:00,,en,1,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0
3,1663133044235018240,hard2predict,False,,438,De siste 30 år (1990-2020) ble all vekst i det...,2023-05-29 10:39:51+00:00,,no,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0
4,1663056745596608514,TanArrowz,False,,1933,GS: China’s online gaming market is dominated ...,2023-05-29 05:36:40+00:00,,en,3,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 0.725, 'pos': 0.275, 'comp...",0.6486,0.275,0.0,0.725
5,1661613875442098179,VivaPost,False,World wide,357,It seems that Chinese tech stocks are less des...,2023-05-25 06:03:13+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 0.922, 'pos': 0.078, 'comp...",0.296,0.078,0.0,0.922
6,1660710766251737090,eastwest_conn,False,,318,Tencent Holdings: spent HK$350 million and rep...,2023-05-22 18:14:35+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 0.581, 'pos': 0.419, 'comp...",0.802,0.419,0.0,0.581
7,1660692003431108608,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$PFE...,2023-05-22 17:00:02+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0
8,1660661806795300864,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$TCE...,2023-05-22 15:00:03+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0
9,1659272657894875137,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$NFL...,2023-05-18 19:00:04+00:00,,en,1,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0


In [13]:
#classifying the tweets based on VADER scores
tweets['type']=''
tweets.loc[tweets.compound>0,'type']='POS'
tweets.loc[tweets.compound==0,'type']='NEUTRAL'
tweets.loc[tweets.compound<0,'type']='NEG'
tweets.head(20)

Unnamed: 0,UserId,UserName,Verified,Location,Followers,Tweet,Date,Retweeted,Language,Likes Count,Retweeted Count,Replied Count,Quoted Count,Hashtag,scores,compound,pos,neg,neu,type
0,1663621305269121028,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$QCO...,2023-05-30 19:00:02+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL
1,1663591110063554562,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$QCO...,2023-05-30 17:00:03+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL
2,1663560911146348547,CryptoTreeMap,False,,443,https://t.co/t6kVcSKEyf\n\nStock market:\n$AVG...,2023-05-30 15:00:03+00:00,,en,1,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL
3,1663133044235018240,hard2predict,False,,438,De siste 30 år (1990-2020) ble all vekst i det...,2023-05-29 10:39:51+00:00,,no,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL
4,1663056745596608514,TanArrowz,False,,1933,GS: China’s online gaming market is dominated ...,2023-05-29 05:36:40+00:00,,en,3,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 0.725, 'pos': 0.275, 'comp...",0.6486,0.275,0.0,0.725,POS
5,1661613875442098179,VivaPost,False,World wide,357,It seems that Chinese tech stocks are less des...,2023-05-25 06:03:13+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 0.922, 'pos': 0.078, 'comp...",0.296,0.078,0.0,0.922,POS
6,1660710766251737090,eastwest_conn,False,,318,Tencent Holdings: spent HK$350 million and rep...,2023-05-22 18:14:35+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 0.581, 'pos': 0.419, 'comp...",0.802,0.419,0.0,0.581,POS
7,1660692003431108608,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$PFE...,2023-05-22 17:00:02+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL
8,1660661806795300864,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$TCE...,2023-05-22 15:00:03+00:00,,en,0,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL
9,1659272657894875137,CryptoTreeMap,False,,443,https://t.co/t6kVcSK6IH\n\nStock market:\n$NFL...,2023-05-18 19:00:04+00:00,,en,1,0,0,0,#TCEHY,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL


In [14]:
#adding column to view preprocessed tweets
tweets['Preprocessed Tweet'] = tweets['Tweet'].apply(lambda tweet: preprocess(tweet))

FINBERT sentiment analysis

In [11]:
finbert_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

In [12]:
def finbert_predict(tweet, tokenizer, model):
    #tokenize text to be sent to model
    inputs = tokenizer(tweet, padding = True, truncation = True, return_tensors='pt')
    outputs = model(**inputs)

    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

    positive = float(predictions[:, 0])
    negative = float(predictions[:, 1])
    neutral = float(predictions[:, 2])

    return [positive, negative, neutral]

In [17]:
finbert_predict(tweets['Preprocessed Tweet'][0], finbert_tokenizer, finbert_model) #test to see if it works

[0.04605134204030037, 0.01414080336689949, 0.9398078322410583]

In [18]:
import time
start = time.time()

tweets['fb pos'] = 0
tweets['fb neg'] = 0
tweets['fb neu'] = 0

for i in range(int(tweets.index.stop)):

    tweets.loc[i, 'fb pos'],\
    tweets.loc[i, 'fb neg'],\
    tweets.loc[i, 'fb neu'] = finbert_predict(tweets.loc[i, 'Preprocessed Tweet'],finbert_tokenizer, finbert_model)

end = time.time()
print('Vreme u sekundama:')
print(end - start)# time in seconds

Vreme u sekundama:
2239.062626838684


In [9]:
tweets.head(10)

Unnamed: 0,UserId,UserName,Verified,Location,Followers,Tweet,Date,Retweeted,Language,Likes Count,...,scores,compound,pos,neg,neu,type,Preprocessed Tweet,fb pos,fb neg,fb neu
0,1664015967280590849,TimeActionQuant,False,"Nashville, TN",1158,Intraday Cycles - 05/31/2023 - Results!\n\n#ES...,2023-05-31 21:08:17+00:00,,en,0,...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL,intraday cycl result es_f spx spi sp http,0.04988,0.018428,0.931692
1,1664013296616259584,solrevived4real,False,,91,couldn't have asked for a more perfect monthly...,2023-05-31 20:57:40+00:00,,en,0,...,"{'neg': 0.0, 'neu': 0.654, 'pos': 0.346, 'comp...",0.5719,0.346,0.0,0.654,POS,ask perfect monthli close spx spi sp http,0.047117,0.027944,0.924939
2,1664013202567446528,AnalyseBourses,False,New York Paris Hong Kong Tokyo,123,.\n\n#Parabolic SAR &amp; STA for the 06/01 wa...,2023-05-31 20:57:18+00:00,,en,0,...,"{'neg': 0.192, 'neu': 0.728, 'pos': 0.08, 'com...",-0.5994,0.08,0.192,0.728,NEG,parabol sar sta put onlin sp eurusd hangseng c...,0.093006,0.014138,0.892855
3,1664011816089927685,DisfoldAI,False,Internet,12829,"The #Nasdaq is trending up, the #Dow is trendi...",2023-05-31 20:51:47+00:00,,en,0,...,"{'neg': 0.0, 'neu': 0.59, 'pos': 0.41, 'compou...",0.9153,0.41,0.0,0.59,POS,nasdaq trend dow trend sp trade sideway one ri...,0.058029,0.023765,0.918206
4,1664010574517133312,sp500bullish,False,,12,@ardizor 0xB7e6691Bb0D09C8F988fDea6D3f2a89cFbe...,2023-05-31 20:46:51+00:00,,en,0,...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL,xbebbdcffdeadfacfbecd,0.062647,0.02008,0.917273
5,1664008712837812225,JAMESALEX7672,False,Nigeria,19,LIVE streaming FREE #GOLD #XAUUSD SIGNALS\nPle...,2023-05-31 20:39:27+00:00,,en,1,...,"{'neg': 0.0, 'neu': 0.833, 'pos': 0.167, 'comp...",0.7003,0.167,0.0,0.833,POS,live stream free gold xauusd signal pleas like...,0.05992,0.017577,0.922502
6,1664008359929085953,billohbarrie199,False,Saudi Arabia,25,LIVE streaming FREE #GOLD #XAUUSD SIGNALS\nPle...,2023-05-31 20:38:03+00:00,,en,2,...,"{'neg': 0.0, 'neu': 0.833, 'pos': 0.167, 'comp...",0.7003,0.167,0.0,0.833,POS,live stream free gold xauusd signal pleas like...,0.05992,0.017577,0.922502
7,1664007431821590530,sjp_research,False,,10,SPX 1-week forecast from 05-23 was right (for ...,2023-05-31 20:34:22+00:00,,en,0,...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,0.0,1.0,NEUTRAL,spx week forecast right pt last forecast right...,0.064816,0.035233,0.899951
8,1664007303769587712,WafaIrfan123,False,"Gbeleko, Nigeria",203,LIVE streaming FREE #GOLD #XAUUSD SIGNALS\nPle...,2023-05-31 20:33:51+00:00,,en,3,...,"{'neg': 0.0, 'neu': 0.838, 'pos': 0.162, 'comp...",0.7003,0.162,0.0,0.838,POS,live stream free gold xauusd signal pleas like...,0.058091,0.018541,0.923368
9,1664007059233288193,forexpips111,False,"Gbongan, Nigeria",123,LIVE streaming FREE #GOLD #XAUUSD SIGNALS\nPle...,2023-05-31 20:32:53+00:00,,en,3,...,"{'neg': 0.0, 'neu': 0.838, 'pos': 0.162, 'comp...",0.7003,0.162,0.0,0.838,POS,live stream free gold xauusd signal pleas like...,0.059766,0.017372,0.922862


FLAIR sentiment analysis

In [10]:
flair_model = flair.models.TextClassifier.load('en-sentiment')

In [14]:
def flair_predict(tweets, model):

    tweets['flair probability'] = 0
    tweets['flair sentiment'] = ''

    for i in range(int(tweets.index.stop)):

        sentence = flair.data.Sentence(str(tweets.loc[i, 'Preprocessed Tweet']))
        model.predict(sentence)

        tweets.loc[i, 'flair probability'] = sentence.labels[0].score # numerical score 0-1
        tweets.loc[i, 'flair sentiment'] = sentence.labels[0].value # 'POSITIVE' or 'NEGATIVE'






In [15]:
start = time.time()

flair_predict(tweets, flair_model)

end = time.time()
print('Vreme u sekundama:')
print(end - start)# time in seconds

Vreme u sekundama:
6783.110467910767


In [16]:
tweets.head(20)

Unnamed: 0,UserId,UserName,Verified,Location,Followers,Tweet,Date,Retweeted,Language,Likes Count,...,pos,neg,neu,type,Preprocessed Tweet,fb pos,fb neg,fb neu,flair probability,flair sentiment
0,1664015967280590849,TimeActionQuant,False,"Nashville, TN",1158,Intraday Cycles - 05/31/2023 - Results!\n\n#ES...,2023-05-31 21:08:17+00:00,,en,0,...,0.0,0.0,1.0,NEUTRAL,intraday cycl result es_f spx spi sp http,0.04988,0.018428,0.931692,0.823521,POSITIVE
1,1664013296616259584,solrevived4real,False,,91,couldn't have asked for a more perfect monthly...,2023-05-31 20:57:40+00:00,,en,0,...,0.346,0.0,0.654,POS,ask perfect monthli close spx spi sp http,0.047117,0.027944,0.924939,0.587212,NEGATIVE
2,1664013202567446528,AnalyseBourses,False,New York Paris Hong Kong Tokyo,123,.\n\n#Parabolic SAR &amp; STA for the 06/01 wa...,2023-05-31 20:57:18+00:00,,en,0,...,0.08,0.192,0.728,NEG,parabol sar sta put onlin sp eurusd hangseng c...,0.093006,0.014138,0.892855,0.906969,NEGATIVE
3,1664011816089927685,DisfoldAI,False,Internet,12829,"The #Nasdaq is trending up, the #Dow is trendi...",2023-05-31 20:51:47+00:00,,en,0,...,0.41,0.0,0.59,POS,nasdaq trend dow trend sp trade sideway one ri...,0.058029,0.023765,0.918206,0.886936,NEGATIVE
4,1664010574517133312,sp500bullish,False,,12,@ardizor 0xB7e6691Bb0D09C8F988fDea6D3f2a89cFbe...,2023-05-31 20:46:51+00:00,,en,0,...,0.0,0.0,1.0,NEUTRAL,xbebbdcffdeadfacfbecd,0.062647,0.02008,0.917273,0.728623,NEGATIVE
5,1664008712837812225,JAMESALEX7672,False,Nigeria,19,LIVE streaming FREE #GOLD #XAUUSD SIGNALS\nPle...,2023-05-31 20:39:27+00:00,,en,1,...,0.167,0.0,0.833,POS,live stream free gold xauusd signal pleas like...,0.05992,0.017577,0.922502,0.769223,NEGATIVE
6,1664008359929085953,billohbarrie199,False,Saudi Arabia,25,LIVE streaming FREE #GOLD #XAUUSD SIGNALS\nPle...,2023-05-31 20:38:03+00:00,,en,2,...,0.167,0.0,0.833,POS,live stream free gold xauusd signal pleas like...,0.05992,0.017577,0.922502,0.769223,NEGATIVE
7,1664007431821590530,sjp_research,False,,10,SPX 1-week forecast from 05-23 was right (for ...,2023-05-31 20:34:22+00:00,,en,0,...,0.0,0.0,1.0,NEUTRAL,spx week forecast right pt last forecast right...,0.064816,0.035233,0.899951,0.900904,NEGATIVE
8,1664007303769587712,WafaIrfan123,False,"Gbeleko, Nigeria",203,LIVE streaming FREE #GOLD #XAUUSD SIGNALS\nPle...,2023-05-31 20:33:51+00:00,,en,3,...,0.162,0.0,0.838,POS,live stream free gold xauusd signal pleas like...,0.058091,0.018541,0.923368,0.645486,NEGATIVE
9,1664007059233288193,forexpips111,False,"Gbongan, Nigeria",123,LIVE streaming FREE #GOLD #XAUUSD SIGNALS\nPle...,2023-05-31 20:32:53+00:00,,en,3,...,0.162,0.0,0.838,POS,live stream free gold xauusd signal pleas like...,0.059766,0.017372,0.922862,0.537783,NEGATIVE


In [17]:
#saving data to csv
tweets.to_csv('data/all_tweets_processed.csv', sep=',', index=False)
