# Tweets processing with [VADER](https://github.com/cjhutto/vaderSentiment)

### <span style="color:#ff5f27;"> 📝 Imports</span>

In [1]:
import json
import io
import re
import time
import os.path
import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


from datetime import timedelta, datetime
from dateutil import parser

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from tqdm import tnrange, tqdm_notebook, tqdm

from sklearn import preprocessing
import matplotlib.pyplot as plt

import warnings

warnings.filterwarnings('ignore')

### <span style="color:#ff5f27;"> 💽 Loading Data</span>

## ‼️ Since [this dataset](https://www.kaggle.com/datasets/kaushiksuresh147/bitcoin-tweets) is about 600 Mb, I do not provide it in this GutHub repository, you should manually download it from Kaggle and place that file in this folder

In [2]:
df_tweets_processed = pd.read_csv("tweets_processed.csv", index_col=0)

## <span style='color:#ff5f27'>🤖 Sentiment analysis with VADER</span>

In [42]:
df_tweets_processed

Unnamed: 0,date,text
0,2021-02-05 10:52:04,📖 Weekend Read 📖\n\nKeen to learn about #cryp...
1,2021-02-05 10:52:04,2⃣ Debunking 9 #Bitcoin Myths by @Patrick_Lo...
2,2021-02-05 10:52:06,4⃣ 🎙️ Bloomberg LP #CryptoOutlook 2021 with @...
3,2021-02-05 10:52:07,"5⃣ #Blockchain 50 2021 by @DelRayMan, @Forbe..."
4,2021-02-05 10:52:26,#reddcoin #rdd @reddcoin to the moon #altcoin ...
...,...,...
3411660,2022-06-04 23:59:32,#Bitcoin #Blockchain Blockfriend nft bot: I ne...
3411661,2022-06-04 23:59:33,Alchemy announces support for Solana Web3 appl...
3411662,2022-06-04 23:59:48,Russia's Industrial Giant Rostec Announces Blo...
3411663,2022-06-04 23:59:49,Is Solana a 'buy' with SOL price at 10-month l...


In [4]:
def timestamp_2_time(x):
    dt_obj = datetime.datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S')
    dt_obj = dt_obj.timestamp() * 1000
    return int(dt_obj)

In [5]:
def vader_processing(df):
    """
    Takes a DataFrame with 'text' column (cleaned using 'clean_text1' function) and
    returns a DataFrame with VADER-analized score.
    """
    
    analyzer = SentimentIntensityAnalyzer()
    compound = []
    for i,s in enumerate(tqdm(df['text'], position=0, leave=True)):
        # print(i,s)
        vs = analyzer.polarity_scores(str(s))
        compound.append(vs["compound"])
    df["compound"] = compound
    df = df.set_index("date")[["compound"]]
    df = df.resample('1D').sum()
    df = df.reset_index()
    df['date'] = df['date'].dt.strftime('%Y-%m-%d %H:%M:%S')
    df['unix'] = df.date.apply(timestamp_2_time)
    
    return df

In [61]:
tweets_vader = vader_processing(df_tweets_processed)
tweets_vader

Unnamed: 0_level_0,compound
date,Unnamed: 1_level_1
2021-02-05 10:00:00,3.4982
2021-02-05 11:00:00,10.0761
2021-02-05 12:00:00,30.7303
2021-02-05 13:00:00,15.8591
2021-02-05 14:00:00,23.8470
...,...
2022-06-04 19:00:00,337.2420
2022-06-04 20:00:00,242.6676
2022-06-04 21:00:00,203.8503
2022-06-04 22:00:00,195.7079


## <span style='color:#ff5f27'> 📥 Save the results</span>

In [6]:
tweets_vader.to_csv("tweets_vader.csv")