In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import statistics
import scipy
from scipy import signal

Collecting plotly
  Downloading plotly-5.5.0-py2.py3-none-any.whl (26.5 MB)
[K     |████████████████████████████████| 26.5 MB 294 kB/s eta 0:00:01
[?25hCollecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.5.0 tenacity-8.0.1


# Vaccine sentiment visualization

In [4]:
tweets = pd.read_csv("../sentiment/saved_sentiment_data/tweet_complete.csv", index_col=0, converters={'hashtags': eval, 'sentiment':eval})

In [5]:
tweets.head()

Unnamed: 0,id,user,text,created_at,location,sentiment,sentiment_compound
0,1486094472798629888,BrizendineSean,dip yet say bitcoin derivative data show trade...,2022-01-25 21:51:47+00:00,"Santa Rosa, California","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0
1,1486094471980802059,yashi_jr,cryptopy buying dip bitcoin breakout bullishaf...,2022-01-25 21:51:47+00:00,,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0
2,1486094470906978305,jillpembrook,trading bitcoin real first thought wa,2022-01-25 21:51:46+00:00,United States,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0
3,1486094470575792128,Btc_Kgee,might marry white girl cause atleast believe b...,2022-01-25 21:51:46+00:00,South Africa,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0
4,1486094466557657088,StandardFine,bitcoin pop glad buy mess,2022-01-25 21:51:45+00:00,Milano,"{'neg': 0.294, 'neu': 0.353, 'pos': 0.353, 'co...",0.128


---

# Positive, negative and neutral sentiment

Split the sentiment three ways to generate discrete variables:
- any sentiment less than -0.33 is negative, above 0.33 is positive and anything in between is neutral

In [6]:
pos_sentiment = [sentiment for sentiment in tweets["sentiment_compound"] if (sentiment >= 0.33)]
neg_sentiment = [sentiment for sentiment in tweets["sentiment_compound"] if (sentiment <= - 0.33)]
neu_sentiment = [sentiment for sentiment in tweets["sentiment_compound"] if (sentiment < 0.33 and sentiment > -0.33)]

In [12]:
fig = px.box(
    y=tweets["sentiment_compound"],
    title = "Overall Sentiment",
    labels={"y":"Sentiment"})
#fig.show()
fig.write_html("Sentiment_Boxplot.html")

In [13]:
fig = px.bar(
    x=["Positive", "Neutral", "Negative"],
    y=[len(pos_sentiment), len(neu_sentiment), len(neg_sentiment)],
    title="Amount of tweets by sentiment",
    labels={"x":"Sentiment","y":"Amount of tweets"})
#fig.show()
fig.write_html("Sentiment_Amount.html")

___

## Google Trends analysis (w/ pytrends):

In [16]:
from pytrends.request import TrendReq

In [17]:
def getRisingTrends(query, time, qtype="rising"):
    pytrend = TrendReq()
    pytrend.build_payload(kw_list=[query], timeframe=time)
    print(pytrend.related_queries()[query][qtype])

In [18]:
getRisingTrends("Bitcoin", "2021-11-15 2021-12-05")

                           query  value
0           bitcoin ban in india   3700
1        bitcoin dominance chart    300
2                   bitcoin drop    250
3            bitcoin neden düştü    200
4             bitcoin hoje dólar    200
5            1 bitcoin kaç dolar    190
6               bitcoin árfolyam    190
7                   rivian stock    150
8                   bitcoin city    140
9              bitcoin haberleri    130
10  cryptocurrency bitcoin price    130
11               bitcoin creator    130
12      fear greed index bitcoin    110
13                  bitcoin rush    110
14  fear and greed index bitcoin    110
15              bitcoin haram mı    110
16                           dax     90
17              bitcoin hari ini     80
18                bitcoin future     80
19             bitcoin kaç dolar     70
20          fear and greed index     70
21        cryptocurrency bitcoin     70
22          bitcoin hoje em real     70
23                cryptocurrency     60


In [19]:
getRisingTrends("Crypto", "2021-11-15 2021-12-05")

                           query  value
0                     vvs crypto  37750
1                 omicron crypto  32600
2             vvs finance crypto  20250
3      is crypto banned in india  10850
4                   cocos crypto   6900
5                   kasta crypto   6800
6                lovelace crypto   6300
7                    bico crypto   5550
8               crypto ban india   5300
9            crypto bull society   4000
10                   mbox crypto   3200
11           crypto ban in india   2500
12        bomb crypto simulation   2250
13  gala crypto price prediction   1900
14                   crypto godz   1700
15                   gyen crypto   1300
16                   gala crypto   1150
17                          gala   1000
18             crypto bill india    900
19             gala games crypto    850
20      why is crypto down today    800
21                    pkn crypto    700
22        why is crypto crashing    650
23             crypto news india    650


In [20]:
getRisingTrends("Cash", "2021-11-15 2021-12-05")

                                                query  value
0                      merchant cash advance blursoft  51150
1   personal injury attorney chicago chicagoaccide...  15300
2                  project management software monday  14800
3                          baccarat rouge 540 dossier  11200
4                              kit kat cash and carry   5100
5                                  johnny cash museum   5100
6                                       delta 8 d8.co   4350
7                              lincs fm cash register   4250
8                   how to borrow money from cash app   3850
9                                     cash movie 2021   1800
10                                       cash hotstar   1500
11                                   chevy cyber cash    550
12                                   cash for life ny    500
13                 cash pot results for today jamaica    250
14                        what play in cash pot today    250
15                      

# Wordcloud

Generate a wordcloud to get an overall idea what words are used/ common in the Dataset:

In [61]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt

def generate_wordcloud(data, title="", filename=""):
    text = " ".join(t for t in data.dropna())
    stopwords = ["amp", "stonks", "https", "RT", "Retweet this", "Airdrop", "t co", "Link"]
    wordcloud = WordCloud(stopwords = stopwords, scale=4, max_font_size=50, max_words=200,background_color="white").generate(text)
    wordcloud.to_file(filename)

In [62]:
generate_wordcloud(tweets["text"], "Most common Words among all tweets", "wordcloud_all.png")

In [63]:
generate_wordcloud(tweets["text"][tweets["sentiment_compound"] >= 0.33], "Most common Words in tweets with positive sentiment", "wordcloud_positive.png")

In [64]:
generate_wordcloud(tweets["text"][tweets["sentiment_compound"] <= -0.33], "Most common Words in tweets with negative sentiment", "wordcloud_negative.png")