In [1]:
import pandas as pd
import numpy as np
import re
import emoji
import string
import plotly.express as px

import nltk
from nltk.tokenize import TweetTokenizer  
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

In [2]:
df = pd.read_csv('btc_2020_to_2022.csv')
df

Unnamed: 0,date,user,tweet,likes,replies,retweets
0,2020-01-01 21:28:09+00:00,TheMoonCarl,┏━━┓┏━━┓┏━━┓┏━━┓\n┗━┓┃┃┏┓┃┗━┓┃┃┏┓┃\n┏━┛┃┃┃┃┃┏━...,248,33,31
1,2020-01-01 19:17:56+00:00,CryptoMichNL,Q1 2020 will be 🔥 for the markets and altcoins...,259,26,24
2,2020-01-01 15:30:40+00:00,Bitsahara,0.0015 BTC New Year Giveaway!\n\n👉RETWEET &amp...,266,410,274
3,2020-01-01 12:18:06+00:00,mishalederman,#Bitcoin has arrived to the #TRON ecosystem!\n...,190,15,73
4,2020-01-01 12:11:39+00:00,CalvinAyre,I have been accused of being able to predict t...,237,22,69
...,...,...,...,...,...,...
27495,2022-09-19 21:53:11+00:00,invest_answers,"Historical Patterns, what it means for Crypto ...",138,42,10
27496,2022-09-19 21:45:41+00:00,Nebraskangooner,#Bitcoin\n\nDaily OBV https://t.co/d228TcvFfJ,211,46,14
27497,2022-09-19 21:44:25+00:00,mattcsnow,who is starting the #bitcoin + sauna conferenc...,122,35,7
27498,2022-09-19 21:42:55+00:00,Danology10,"Keep calm and #HODL, while #BTC does the ol fl...",163,16,23


In [3]:
df = df.astype({"tweet": str}, errors='raise')
df.dtypes

date        object
user        object
tweet       object
likes        int64
replies      int64
retweets     int64
dtype: object

### 1. Removing null values if any

In [4]:
df.isna().any()

date        False
user        False
tweet       False
likes       False
replies     False
retweets    False
dtype: bool

* comment: likes, replies and retweets having 0 count is normal

#### 2. Dropping duplicate tweets

In [5]:
df.duplicated(subset='tweet').sum()

92

In [6]:
df = df.drop_duplicates(subset=['tweet'])

#### 3. Lowercasing tweets content

In [7]:
df['tweet'] = df.tweet.str.lower()

In [8]:
df.head()

Unnamed: 0,date,user,tweet,likes,replies,retweets
0,2020-01-01 21:28:09+00:00,TheMoonCarl,┏━━┓┏━━┓┏━━┓┏━━┓\n┗━┓┃┃┏┓┃┗━┓┃┃┏┓┃\n┏━┛┃┃┃┃┃┏━...,248,33,31
1,2020-01-01 19:17:56+00:00,CryptoMichNL,q1 2020 will be 🔥 for the markets and altcoins...,259,26,24
2,2020-01-01 15:30:40+00:00,Bitsahara,0.0015 btc new year giveaway!\n\n👉retweet &amp...,266,410,274
3,2020-01-01 12:18:06+00:00,mishalederman,#bitcoin has arrived to the #tron ecosystem!\n...,190,15,73
4,2020-01-01 12:11:39+00:00,CalvinAyre,i have been accused of being able to predict t...,237,22,69


#### 4. Removing "@" and usernames

In [9]:
df.tweet = df.tweet.apply(lambda x: re.sub("@[A-Za-z0-9_]+", "", x))
df.head()

Unnamed: 0,date,user,tweet,likes,replies,retweets
0,2020-01-01 21:28:09+00:00,TheMoonCarl,┏━━┓┏━━┓┏━━┓┏━━┓\n┗━┓┃┃┏┓┃┗━┓┃┃┏┓┃\n┏━┛┃┃┃┃┃┏━...,248,33,31
1,2020-01-01 19:17:56+00:00,CryptoMichNL,q1 2020 will be 🔥 for the markets and altcoins...,259,26,24
2,2020-01-01 15:30:40+00:00,Bitsahara,0.0015 btc new year giveaway!\n\n👉retweet &amp...,266,410,274
3,2020-01-01 12:18:06+00:00,mishalederman,#bitcoin has arrived to the #tron ecosystem!\n...,190,15,73
4,2020-01-01 12:11:39+00:00,CalvinAyre,i have been accused of being able to predict t...,237,22,69


#### 5. Removing "#" symbol

In [10]:
df.tweet = df.tweet.apply(lambda x:  re.sub(r'#', '', x) )

#### 6. Removing emoji 

In [11]:
df = df.astype(str).apply(lambda x: x.str.encode('ascii', 'ignore').str.decode('ascii'))
df.head()

Unnamed: 0,date,user,tweet,likes,replies,retweets
0,2020-01-01 21:28:09+00:00,TheMoonCarl,\n\n \n ...,248,33,31
1,2020-01-01 19:17:56+00:00,CryptoMichNL,q1 2020 will be for the markets and altcoins....,259,26,24
2,2020-01-01 15:30:40+00:00,Bitsahara,0.0015 btc new year giveaway!\n\nretweet &amp;...,266,410,274
3,2020-01-01 12:18:06+00:00,mishalederman,bitcoin has arrived to the tron ecosystem!\n\n...,190,15,73
4,2020-01-01 12:11:39+00:00,CalvinAyre,i have been accused of being able to predict t...,237,22,69


In [12]:
def strip_links(text):
    link_regex    = re.compile('((https?):((//)|(\\\\))+([\w\d:#@%/;$()~_?\+-=\\\.&](#!)?)*)', re.DOTALL)
    links         = re.findall(link_regex, text)
    for link in links:
        text = text.replace(link[0], ', ')    
    return text

tests = df["tweet"]
temp = []
for t in tests:
    temp.append(strip_links(t))
df["tweet"] = temp
df.head()

Unnamed: 0,date,user,tweet,likes,replies,retweets
0,2020-01-01 21:28:09+00:00,TheMoonCarl,\n\n \n ...,248,33,31
1,2020-01-01 19:17:56+00:00,CryptoMichNL,q1 2020 will be for the markets and altcoins....,259,26,24
2,2020-01-01 15:30:40+00:00,Bitsahara,0.0015 btc new year giveaway!\n\nretweet &amp;...,266,410,274
3,2020-01-01 12:18:06+00:00,mishalederman,bitcoin has arrived to the tron ecosystem!\n\n...,190,15,73
4,2020-01-01 12:11:39+00:00,CalvinAyre,i have been accused of being able to predict t...,237,22,69


In [13]:
df.tweet =    df.tweet.apply(lambda x: re.sub(r'[0-9]', '', x) ) # removing single digit numbers 

In [14]:
df.shape

(27408, 6)

In [15]:
df = df[~df.tweet.str.contains("bot")]
df = df[~df.tweet.str.contains("telegram")]
df = df[~df.tweet.str.contains("giveaway")]
df = df[~df.tweet.str.contains("earn free")]
df = df[~df.tweet.str.contains("video")]
df = df[~df.tweet.str.contains("follow and share")]

In [16]:
df.shape

(25961, 6)

In [17]:
df.head()

Unnamed: 0,date,user,tweet,likes,replies,retweets
0,2020-01-01 21:28:09+00:00,TheMoonCarl,\n\n \n ...,248,33,31
1,2020-01-01 19:17:56+00:00,CryptoMichNL,q will be for the markets and altcoins. \n\n...,259,26,24
3,2020-01-01 12:18:06+00:00,mishalederman,bitcoin has arrived to the tron ecosystem!\n\n...,190,15,73
4,2020-01-01 12:11:39+00:00,CalvinAyre,i have been accused of being able to predict t...,237,22,69
5,2020-01-01 07:59:23+00:00,TheMoonCarl,my plans for :\n\n accumulate bitcoin \n accum...,758,91,97


In [18]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/ryan.chang/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [19]:
# instantiate the tokenizer class
tokenizer = TweetTokenizer(preserve_case=False, 
                           strip_handles=True,
                           reduce_len=True)
# tokenize the tweets
df["tokenized"] = df.tweet.apply(nltk.word_tokenize)
print('\nTokenized strings:')
df.head()


Tokenized strings:


Unnamed: 0,date,user,tweet,likes,replies,retweets,tokenized
0,2020-01-01 21:28:09+00:00,TheMoonCarl,\n\n \n ...,248,33,31,"[|, bitcoin, ,, not, altcoins, !]"
1,2020-01-01 19:17:56+00:00,CryptoMichNL,q will be for the markets and altcoins. \n\n...,259,26,24,"[q, will, be, for, the, markets, and, altcoins..."
3,2020-01-01 12:18:06+00:00,mishalederman,bitcoin has arrived to the tron ecosystem!\n\n...,190,15,73,"[bitcoin, has, arrived, to, the, tron, ecosyst..."
4,2020-01-01 12:11:39+00:00,CalvinAyre,i have been accused of being able to predict t...,237,22,69,"[i, have, been, accused, of, being, able, to, ..."
5,2020-01-01 07:59:23+00:00,TheMoonCarl,my plans for :\n\n accumulate bitcoin \n accum...,758,91,97,"[my, plans, for, :, accumulate, bitcoin, accum..."


In [20]:
df["tweet"] = df["tweet"].apply(lambda x: "".join([word for word in x if word not in string.punctuation]))

In [21]:
# Function to remove punctuations and emojis

def remove_punct(text):
    # remove numbers
    text_nonum = re.sub(r'\d+', '', text)
    # remove punctuations and convert characters to lower case
    text_nopunct = "".join([char.lower() for char in text_nonum if char not in string.punctuation]) 
    # substitute multiple whitespace with single whitespace
    # Also, removes leading and trailing whitespaces
    text_no_doublespace = re.sub('\s+', ' ', text_nopunct).strip()
    return text_no_doublespace

df['tweet'] = df['tweet'].str.replace("[^a-zA-Z#!]", " ")
df.head()

  df['tweet'] = df['tweet'].str.replace("[^a-zA-Z#!]", " ")


Unnamed: 0,date,user,tweet,likes,replies,retweets,tokenized
0,2020-01-01 21:28:09+00:00,TheMoonCarl,...,248,33,31,"[|, bitcoin, ,, not, altcoins, !]"
1,2020-01-01 19:17:56+00:00,CryptoMichNL,q will be for the markets and altcoins btc...,259,26,24,"[q, will, be, for, the, markets, and, altcoins..."
3,2020-01-01 12:18:06+00:00,mishalederman,bitcoin has arrived to the tron ecosystem enj...,190,15,73,"[bitcoin, has, arrived, to, the, tron, ecosyst..."
4,2020-01-01 12:11:39+00:00,CalvinAyre,i have been accused of being able to predict t...,237,22,69,"[i, have, been, accused, of, being, able, to, ..."
5,2020-01-01 07:59:23+00:00,TheMoonCarl,my plans for accumulate bitcoin accumulat...,758,91,97,"[my, plans, for, :, accumulate, bitcoin, accum..."


In [22]:
df['tweet'] = df['tweet'].apply(remove_punct)

In [23]:
# instantiate the tokenizer class
tokenizer = TweetTokenizer(preserve_case=False, 
                           strip_handles=True,
                           reduce_len=True)
# tokenize the tweets
df["tokenized"] = df["tweet"].apply(nltk.word_tokenize)
print('\nTokenized strings:')
df.head(3)


Tokenized strings:


Unnamed: 0,date,user,tweet,likes,replies,retweets,tokenized
0,2020-01-01 21:28:09+00:00,TheMoonCarl,bitcoin not altcoins,248,33,31,"[bitcoin, not, altcoins]"
1,2020-01-01 19:17:56+00:00,CryptoMichNL,q will be for the markets and altcoins btc bit...,259,26,24,"[q, will, be, for, the, markets, and, altcoins..."
3,2020-01-01 12:18:06+00:00,mishalederman,bitcoin has arrived to the tron ecosystem enjo...,190,15,73,"[bitcoin, has, arrived, to, the, tron, ecosyst..."


In [24]:
# Tokenize Data
tokenize_tweets = df['tweet'].apply(lambda x: x.split()) 
tokenize_tweets.head(10)

0                              [bitcoin, not, altcoins]
1     [q, will, be, for, the, markets, and, altcoins...
3     [bitcoin, has, arrived, to, the, tron, ecosyst...
4     [i, have, been, accused, of, being, able, to, ...
5     [my, plans, for, accumulate, bitcoin, accumula...
6     [good, news, just, in, from, australia, bitcoi...
7     [heres, to, and, the, next, ten, years, may, t...
9     [tronwallet, introduces, a, completely, new, s...
10    [no, matter, how, far, bitcoin, falls, in, the...
11    [continuing, the, dec, trend, all, asset, clas...
Name: tweet, dtype: object

In [25]:
stemmer = PorterStemmer()

tokenize_tweets = tokenize_tweets.apply(lambda x: [stemmer.stem(i) for i in x])
tokenize_tweets.head(10)

0                               [bitcoin, not, altcoin]
1     [q, will, be, for, the, market, and, altcoin, ...
3     [bitcoin, ha, arriv, to, the, tron, ecosystem,...
4     [i, have, been, accus, of, be, abl, to, predic...
5     [my, plan, for, accumul, bitcoin, accumul, bit...
6     [good, news, just, in, from, australia, bitcoi...
7     [here, to, and, the, next, ten, year, may, thi...
9     [tronwallet, introduc, a, complet, new, swap, ...
10    [no, matter, how, far, bitcoin, fall, in, the,...
11    [continu, the, dec, trend, all, asset, class, ...
Name: tweet, dtype: object

In [26]:
df.to_csv('btc_2020_to_2022_processed.csv')

In [27]:
# df = pd.read_csv('btc_2022_processed.csv', index_col=[0])
# df

In [28]:
from nltk.tag import pos_tag

def getAdjectives(tweet):
    tweet = word_tokenize(tweet)  # convert string to tokens
    tweet = [word for (word, tag) in pos_tag(tweet)
             if tag == "JJ"]  # pos_tag module in NLTK library
    return " ".join(tweet)  # join words with a space in between them

In [29]:
from textblob import TextBlob

# Create function to obtain Subjectivity Score
def getSubjectivity(tweet):
    return TextBlob(tweet).sentiment.subjectivity

# Create function to obtain Polarity Score
def getPolarity(tweet):
    return TextBlob(tweet).sentiment.polarity

# Create function to obtain Sentiment category
def getSentimentTextBlob(polarity):
    if polarity < 0:
        return "Negative"
    elif polarity == 0:
        return "Neutral"
    else:
        return "Positive"

In [30]:
from nltk.stem import WordNetLemmatizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


# function to return words to their base form using Lemmatizer
def preprocessTweetsSentiments(tweet):
    tweet_tokens = word_tokenize(tweet)
    lemmatizer = WordNetLemmatizer() # instatiate an object WordNetLemmatizer Class
    lemma_words = [lemmatizer.lemmatize(w) for w in tweet_tokens]
    return " ".join(lemma_words)

In [31]:
df["Tweets_Sentiments"] = df["tokenized"].astype("str").apply(preprocessTweetsSentiments)

In [32]:
# Apply all functions above to respective columns
#df_clean['Tweets_Sentiments'] = 0
df['Polarity'] = 0
df['Sentiment'] = 0
df['Subjectivity']=df['Tweets_Sentiments'].astype('str').apply(getSubjectivity)
df['Polarity']=df['Tweets_Sentiments'].astype('str').apply(getPolarity)
df['Sentiment']=df['Polarity'].apply(getSentimentTextBlob)

In [33]:
# See quick results of the Sentiment Analysis
df['Sentiment'].value_counts()

Positive    12347
Neutral      9412
Negative     4202
Name: Sentiment, dtype: int64

In [37]:
# Create dataframe for Count of Sentiment Categories
bar_chart = df['Sentiment'].value_counts().rename_axis('Sentiment').to_frame('Total Tweets').reset_index()

In [38]:
bar_chart # Display dataframe

Unnamed: 0,Sentiment,Total Tweets
0,Positive,12347
1,Neutral,9412
2,Negative,4202


In [39]:
df

Unnamed: 0,date,user,tweet,likes,replies,retweets,tokenized,Tweets_Sentiments,Polarity,Sentiment,Subjectivity
0,2020-01-01 21:28:09+00:00,TheMoonCarl,bitcoin not altcoins,248,33,31,"[bitcoin, not, altcoins]","[ 'bitcoin ' , 'not ' , 'altcoins ' ]",0.000000,Neutral,0.000000
1,2020-01-01 19:17:56+00:00,CryptoMichNL,q will be for the markets and altcoins btc bit...,259,26,24,"[q, will, be, for, the, markets, and, altcoins...","[ ' q ' , 'will ' , 'be ' , 'for ' , 'the ' , ...",0.000000,Neutral,0.000000
3,2020-01-01 12:18:06+00:00,mishalederman,bitcoin has arrived to the tron ecosystem enjo...,190,15,73,"[bitcoin, has, arrived, to, the, tron, ecosyst...","[ 'bitcoin ' , 'has ' , 'arrived ' , 'to ' , '...",0.400000,Positive,0.500000
4,2020-01-01 12:11:39+00:00,CalvinAyre,i have been accused of being able to predict t...,237,22,69,"[i, have, been, accused, of, being, able, to, ...","[ ' i ' , 'have ' , 'been ' , 'accused ' , 'of...",0.171510,Positive,0.580032
5,2020-01-01 07:59:23+00:00,TheMoonCarl,my plans for accumulate bitcoin accumulate bit...,758,91,97,"[my, plans, for, accumulate, bitcoin, accumula...","[ 'my ' , 'plans ' , 'for ' , 'accumulate ' , ...",0.000000,Neutral,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
27495,2022-09-19 21:53:11+00:00,invest_answers,historical patterns what it means for crypto a...,138,42,10,"[historical, patterns, what, it, means, for, c...","[ 'historical ' , 'patterns ' , 'what ' , 'it ...",0.145455,Positive,0.233333
27496,2022-09-19 21:45:41+00:00,Nebraskangooner,bitcoin daily obv,211,46,14,"[bitcoin, daily, obv]","[ 'bitcoin ' , 'daily ' , 'obv ' ]",0.000000,Neutral,0.000000
27497,2022-09-19 21:44:25+00:00,mattcsnow,who is starting the bitcoin sauna conference a...,122,35,7,"[who, is, starting, the, bitcoin, sauna, confe...","[ 'who ' , 'is ' , 'starting ' , 'the ' , 'bit...",0.000000,Neutral,0.100000
27498,2022-09-19 21:42:55+00:00,Danology10,keep calm and hodl while btc does the ol flipp...,163,16,23,"[keep, calm, and, hodl, while, btc, does, the,...","[ 'keep ' , 'calm ' , 'and ' , 'hodl ' , 'whil...",-0.050000,Negative,0.675000


In [None]:
df.to_csv('btc_2020_to_2022_sentiment_nltk.csv', index=False)

# VADER
---

In [46]:
df = pd.read_csv("btc_processed.csv", index_col=[0])
df

Unnamed: 0,date,user,tweet,likes,replies,retweets,tokenized
0,2022-09-03 08:04:04+00:00,CharlieCryptoe,everything bubble gonna pop housingcrisis this...,0.0,0.0,0.0,"['everything', 'bubble', 'gon', 'na', 'pop', '..."
1,2022-09-03 08:04:01+00:00,altscanner,chz is cooling down from btc btc,0.0,0.0,0.0,"['chz', 'is', 'cooling', 'down', 'from', 'btc'..."
2,2022-09-03 08:04:00+00:00,SahbazzadeElsad,new x gem mc k holders cmc and cg dextools hot...,0.0,0.0,0.0,"['new', 'x', 'gem', 'mc', 'k', 'holders', 'cmc..."
3,2022-09-03 08:04:00+00:00,AltCryptoGems,in a few years time it doesnt matter whether y...,0.0,0.0,0.0,"['in', 'a', 'few', 'years', 'time', 'it', 'doe..."
4,2022-09-03 08:03:59+00:00,CryptoCoachSami,btc hr chart price action longshort the breako...,0.0,1.0,0.0,"['btc', 'hr', 'chart', 'price', 'action', 'lon..."
...,...,...,...,...,...,...,...
9999,2022-09-02 21:02:17+00:00,bruindesimon,ft crypto watch sept curated post from publish...,0.0,0.0,0.0,"['ft', 'crypto', 'watch', 'sept', 'curated', '..."
10000,2022-09-02 21:02:16+00:00,BitomicWhales,someone just transfered btc m usd,0.0,0.0,0.0,"['someone', 'just', 'transfered', 'btc', 'm', ..."
10001,2022-09-02 21:02:13+00:00,ICOINOKLASTING,the cure for longcovid is long money or bitcoi...,0.0,2.0,0.0,"['the', 'cure', 'for', 'longcovid', 'is', 'lon..."
10002,2022-09-02 21:02:11+00:00,bmurphypointman,linkedin twitter facebook instagram tiktok soc...,0.0,0.0,0.0,"['linkedin', 'twitter', 'facebook', 'instagram..."


In [47]:
# Calculate Polarity

analyser = SentimentIntensityAnalyzer()

scores = []
for sentence in df["tweet"].astype("str"):
  score = analyser.polarity_scores(sentence)
  scores.append(score)
  
scores = pd.DataFrame(scores)

df['Compound'] = scores['compound']
df['Negative'] = scores['neg']
df['Neutral'] = scores['neu']
df['Positive'] = scores['pos']

df

Unnamed: 0,date,user,tweet,likes,replies,retweets,tokenized,Compound,Negative,Neutral,Positive
0,2022-09-03 08:04:04+00:00,CharlieCryptoe,everything bubble gonna pop housingcrisis this...,0.0,0.0,0.0,"['everything', 'bubble', 'gon', 'na', 'pop', '...",-0.4226,0.111,0.833,0.056
1,2022-09-03 08:04:01+00:00,altscanner,chz is cooling down from btc btc,0.0,0.0,0.0,"['chz', 'is', 'cooling', 'down', 'from', 'btc'...",0.0000,0.000,1.000,0.000
2,2022-09-03 08:04:00+00:00,SahbazzadeElsad,new x gem mc k holders cmc and cg dextools hot...,0.0,0.0,0.0,"['new', 'x', 'gem', 'mc', 'k', 'holders', 'cmc...",0.0000,0.000,1.000,0.000
3,2022-09-03 08:04:00+00:00,AltCryptoGems,in a few years time it doesnt matter whether y...,0.0,0.0,0.0,"['in', 'a', 'few', 'years', 'time', 'it', 'doe...",0.2086,0.037,0.897,0.066
4,2022-09-03 08:03:59+00:00,CryptoCoachSami,btc hr chart price action longshort the breako...,0.0,1.0,0.0,"['btc', 'hr', 'chart', 'price', 'action', 'lon...",0.0000,0.000,1.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...
9999,2022-09-02 21:02:17+00:00,bruindesimon,ft crypto watch sept curated post from publish...,0.0,0.0,0.0,"['ft', 'crypto', 'watch', 'sept', 'curated', '...",,,,
10000,2022-09-02 21:02:16+00:00,BitomicWhales,someone just transfered btc m usd,0.0,0.0,0.0,"['someone', 'just', 'transfered', 'btc', 'm', ...",,,,
10001,2022-09-02 21:02:13+00:00,ICOINOKLASTING,the cure for longcovid is long money or bitcoi...,0.0,2.0,0.0,"['the', 'cure', 'for', 'longcovid', 'is', 'lon...",,,,
10002,2022-09-02 21:02:11+00:00,bmurphypointman,linkedin twitter facebook instagram tiktok soc...,0.0,0.0,0.0,"['linkedin', 'twitter', 'facebook', 'instagram...",,,,


In [49]:
sentiment = []

pos = df['Positive'].tolist()
neg = df['Negative'].tolist()
neu = df['Neutral'].tolist()

for i in range(len(pos)):

    if pos[i] > neg[i] and pos[i] > neu[i]:
        sentiment.append("Positive")
    
    elif neg[i] > pos[i] and neg[i] > neu[i]:
        sentiment.append("Negative")

    else:
        sentiment.append("Neutral")

df["Sentiment"] = sentiment
df


Unnamed: 0,date,user,tweet,likes,replies,retweets,tokenized,Compound,Negative,Neutral,Positive,Sentiment
0,2022-09-03 08:04:04+00:00,CharlieCryptoe,everything bubble gonna pop housingcrisis this...,0.0,0.0,0.0,"['everything', 'bubble', 'gon', 'na', 'pop', '...",-0.4226,0.111,0.833,0.056,Neutral
1,2022-09-03 08:04:01+00:00,altscanner,chz is cooling down from btc btc,0.0,0.0,0.0,"['chz', 'is', 'cooling', 'down', 'from', 'btc'...",0.0000,0.000,1.000,0.000,Neutral
2,2022-09-03 08:04:00+00:00,SahbazzadeElsad,new x gem mc k holders cmc and cg dextools hot...,0.0,0.0,0.0,"['new', 'x', 'gem', 'mc', 'k', 'holders', 'cmc...",0.0000,0.000,1.000,0.000,Neutral
3,2022-09-03 08:04:00+00:00,AltCryptoGems,in a few years time it doesnt matter whether y...,0.0,0.0,0.0,"['in', 'a', 'few', 'years', 'time', 'it', 'doe...",0.2086,0.037,0.897,0.066,Neutral
4,2022-09-03 08:03:59+00:00,CryptoCoachSami,btc hr chart price action longshort the breako...,0.0,1.0,0.0,"['btc', 'hr', 'chart', 'price', 'action', 'lon...",0.0000,0.000,1.000,0.000,Neutral
...,...,...,...,...,...,...,...,...,...,...,...,...
9999,2022-09-02 21:02:17+00:00,bruindesimon,ft crypto watch sept curated post from publish...,0.0,0.0,0.0,"['ft', 'crypto', 'watch', 'sept', 'curated', '...",,,,,Neutral
10000,2022-09-02 21:02:16+00:00,BitomicWhales,someone just transfered btc m usd,0.0,0.0,0.0,"['someone', 'just', 'transfered', 'btc', 'm', ...",,,,,Neutral
10001,2022-09-02 21:02:13+00:00,ICOINOKLASTING,the cure for longcovid is long money or bitcoi...,0.0,2.0,0.0,"['the', 'cure', 'for', 'longcovid', 'is', 'lon...",,,,,Neutral
10002,2022-09-02 21:02:11+00:00,bmurphypointman,linkedin twitter facebook instagram tiktok soc...,0.0,0.0,0.0,"['linkedin', 'twitter', 'facebook', 'instagram...",,,,,Neutral


In [51]:
df["Sentiment"].value_counts()

Neutral     9296
Positive      82
Negative      32
Name: Sentiment, dtype: int64

In [53]:
df.to_csv('btc_sentiment_vader.csv', index=False)