In [6]:
from datetime import datetime
from datetime import date
import requests
import pandas as pd
import json
import hvplot
import hvplot.pandas
import re
from sklearn import preprocessing 
from textblob import TextBlob
import time
import numpy as np


In [7]:
# load Twitter API keys

import os
import dotenv
import tweepy as tw

#dotenv.load_dotenv('.env')

from dotenv import load_dotenv #used examples from API pulls in class exercises
load_dotenv()

consumer_key = os.getenv('TWITTER_CONSUMER_KEY')
consumer_secret = os.getenv('TWITTER_CONSUMER_SECRET')
access_token = os.getenv('TWITTER_ACCESS_TOKEN')
access_token_secret = os.getenv('TWITTER_ACCESS_TOKEN_SECRET')


In [8]:
#authorize Twitter API

auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True)



In [113]:
# Read the CSV located at the file path into a Pandas DataFrame

btc_df = pd.read_csv("BTC_USD2.csv", parse_dates=True, infer_datetime_format=True)
btc_df['Date'] =  pd.to_datetime(btc_df['Date'])
btc_df = btc_df [["Date","Price"]]
btc_df = btc_df.rename(columns={'Price': 'Close'})
btc_df['Close']=btc_df['Close'].replace(',', '', regex=True).astype(float)
btc_df = btc_df.set_index('Date')
btc_df = btc_df.dropna()

# Print the DataFrame
btc_df

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2018-01-18,11045.0
2018-01-19,11476.0
2018-01-20,12728.0
2018-01-21,11514.0
2018-01-22,10771.0
...,...
2021-01-19,35958.0
2021-01-20,35521.1
2021-01-21,30873.0
2021-01-22,33062.0


In [10]:
# DEFINING A FUNCTION TO REMOVE REMOVE URL FOR ALL TWEETS TO USE LATER ON

#def remove_url(txt):
#    return " ".join(re.sub("([^0-9A-Za-z \t])|(\w+:\/\/\S+)", "", txt).split())


def remove_pattern(input_txt, pattern):
    r = re.findall(pattern, input_txt)
    for i in r:
        input_txt = re.sub(i, '', input_txt)        
    return input_txt

def clean_tweets(tweets):
    #remove twitter Return handles (RT @xxx:)
    tweets = np.vectorize(remove_pattern)(tweets, "RT @[\w]*:") 
    
    #remove twitter handles (@xxx)
    tweets = np.vectorize(remove_pattern)(tweets, "@[\w]*")
    
    #remove URL links (httpxxx)
    tweets = np.vectorize(remove_pattern)(tweets, "https?://[A-Za-z0-9./]*")
    
    #remove special characters, numbers, punctuations (except for #)
    tweets = np.core.defchararray.replace(tweets, "[^a-zA-Z]", " ")
    
    return tweets

In [11]:
# SECTION A: GETTING A ALL TWEETS FROM @CRYPTO (BLOOMBERG'S CRYPTO NEWS IN TWITTER)

screen_name = 'crypto'

crypto_tweets = tw.Cursor(api.user_timeline,
              screen_name=screen_name,
              tweet_mode='extended',
              lang =" en").items()

crypto_json = []

for tweet in crypto_tweets:
        crypto_json.append(tweet._json)


In [12]:
# removing URLs, and creating DataFrame for specific tweet columns

crypto_list=[]

with open('crypto.txt', 'w') as file:
        file.write(json.dumps(crypto_json, indent=4))

with open('crypto.txt', encoding='utf-8') as json_file:  
    all_data = json.load(json_file)
    for each_dictionary in all_data:
        tweet_id = each_dictionary['id']
        text = clean_tweets(each_dictionary['full_text'])
        Date = each_dictionary['created_at']
        name = each_dictionary['user']['screen_name']
        
        crypto_list.append({'tweet_id': str(tweet_id),
                             'text': str(text),
                             'name': str(name),
                             'Date': Date,
                            })

        crypto_df = pd.DataFrame(crypto_list, columns = 
                                  ['Date','tweet_id', 'text', 
                                   'name'])

crypto_df.to_csv('crypto.csv', index = False)        
        
#pd.options.display.max_columns = None
#pd.options.display.max_colwidth = 500


In [55]:
#Reading in CSV file
#(Note I had to export into csv so those without the twitter API can use the formatted data in csv format)

pd.options.display.max_colwidth = 500
crypto_df = pd.read_csv('crypto.csv')
crypto_df

Unnamed: 0,Date,tweet_id,text,name
0,Fri Jan 22 14:52:31 +0000 2021,1352630229337051143,"The 2021 betting odds on Bitcoin, Bieber, Andrew Yang, the Tokyo Olympics, and more",crypto
1,Fri Jan 22 14:47:24 +0000 2021,1352628938057650179,"-It was developed by a mythical figure\n-The early writings are sacred texts\n-There are saints, holidays, and diets\n\nIs Bitcoin a re…",crypto
2,Fri Jan 22 14:30:20 +0000 2021,1352624645774725127,"What's ahead for #crypto in 2021? Tune in LIVE to our Q&amp;A with , Asia chief of crypto firm Luno, and , executive at SFC-licensed digital asset platform OSL.\n\nHave questions for our experts? Reply to this tweet or #BloombergMarkets\n\n#Bitcoin #Cryptocurrency",crypto
3,Fri Jan 22 14:09:10 +0000 2021,1352619319323144194,"Cryptocurrencies may never be able to work as actual currencies due to a ""fundamental flaw,"" says a UBS economist",crypto
4,Fri Jan 22 13:23:33 +0000 2021,1352607839211159553,Signs of bubbly markets are everywhere. Here's what you can do to play it cool,crypto
...,...,...,...,...
3210,Thu Feb 22 23:49:18 +0000 2018,966822236060610562,"The USA Luge team weighs in on Bitcoin: ""Both know all about speed, crashes, risk management, and holding on.""",crypto
3211,Thu Feb 22 23:01:16 +0000 2018,966810149695475712,Venezuelan citizens can't buy their own country's crypto coin because it doesn't accept bolivars and they aren't allowed to buy foreign currency,crypto
3212,Thu Feb 22 22:38:42 +0000 2018,966804469185990656,This J-pop band is getting in on the cryptocurrency craze. has more from Tokyo via,crypto
3213,Thu Feb 22 22:15:34 +0000 2018,966798649073307648,More than 4 million have signed up to open accounts at Robinhood as the brokerage app begins offering commission-free trading of Bitcoin and Ethereum today for the first time,crypto


In [56]:
# Cleaning up Date from twitter format to python readable format

crypto_df = crypto_df.dropna().drop_duplicates()
crypto_df['Date'] = pd.to_datetime(crypto_df['Date'],utc=True)
crypto_df['Date'] = crypto_df['Date'].dt.date
crypto_df['Date'] = pd.to_datetime(crypto_df['Date'])
crypto_df.dtypes

# Adding textblob sentiment
crypto_df['textblob']= crypto_df['text'].apply(lambda tweet: TextBlob(tweet).sentiment.polarity)
crypto_df

# Adding vader sentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

def sentiment_score_compound(sentence):
    score = analyzer.polarity_scores(sentence)
    return score['compound']

crypto_df['vader_compound']= crypto_df.apply(lambda row: sentiment_score_compound(row['text']), axis=1)
crypto_df.drop_duplicates()

# Only getting tweets with word bitcoin
search = ['Bitcoin','bitcoin','BTC','btc','crypto','cryptocurrency']

crypto_df = crypto_df[crypto_df['text'].str.contains('|'.join(search))]
crypto_df

Unnamed: 0,Date,tweet_id,text,name,textblob,vader_compound
0,2021-01-22,1352630229337051143,"The 2021 betting odds on Bitcoin, Bieber, Andrew Yang, the Tokyo Olympics, and more",crypto,0.500000,0.0000
1,2021-01-22,1352628938057650179,"-It was developed by a mythical figure\n-The early writings are sacred texts\n-There are saints, holidays, and diets\n\nIs Bitcoin a re…",crypto,0.100000,0.3818
2,2021-01-22,1352624645774725127,"What's ahead for #crypto in 2021? Tune in LIVE to our Q&amp;A with , Asia chief of crypto firm Luno, and , executive at SFC-licensed digital asset platform OSL.\n\nHave questions for our experts? Reply to this tweet or #BloombergMarkets\n\n#Bitcoin #Cryptocurrency",crypto,-0.021212,0.4329
5,2021-01-22,1352580288950521856,"What's ahead for #crypto in 2021? Tune in LIVE to our Q&amp;A with , Asia chief of crypto firm Luno, and , executive at SFC-licensed digital asset platform OSL.\n\nHave questions for our experts? Reply to this tweet or #BloombergMarkets\n\n#Bitcoin #Cryptocurrency",crypto,-0.021212,0.4329
6,2021-01-22,1352504580970598400,"What's ahead for #crypto in 2021? Tune in LIVE to our Q&amp;A with , Asia chief of crypto firm Luno, and , executive at SFC-licensed digital asset platform OSL.\n\nHave questions for our experts? Reply to this tweet or #BloombergMarkets\n\n#Bitcoin #Cryptocurrency",crypto,-0.021212,0.4329
...,...,...,...,...,...,...
3210,2018-02-22,966822236060610562,"The USA Luge team weighs in on Bitcoin: ""Both know all about speed, crashes, risk management, and holding on.""",crypto,0.000000,-0.2732
3211,2018-02-22,966810149695475712,Venezuelan citizens can't buy their own country's crypto coin because it doesn't accept bolivars and they aren't allowed to buy foreign currency,crypto,0.237500,-0.2924
3212,2018-02-22,966804469185990656,This J-pop band is getting in on the cryptocurrency craze. has more from Tokyo via,crypto,0.500000,-0.1531
3213,2018-02-22,966798649073307648,More than 4 million have signed up to open accounts at Robinhood as the brokerage app begins offering commission-free trading of Bitcoin and Ethereum today for the first time,crypto,0.250000,0.0000


In [114]:
#Grouping sentiment by date and looking at charts of sentiment and volume over time

crypto_bydate = crypto_df.groupby(['Date'], as_index=False).agg({'textblob': 'mean',
                                                                 'vader_compound': 'mean',
                                                                 'text': 'first',
                                                                 'tweet_id': 'count'})
crypto_bydate

#Combining BTC price with dataframe

#crypto_bydate = crypto_bydate.set_index('Date')
btc_crypto = pd.merge(crypto_bydate,btc_df, how='inner', on= "Date")
btc_crypto

Unnamed: 0,Date,textblob,vader_compound,text,tweet_id,Close
0,2018-02-22,0.197500,-0.014020,"The USA Luge team weighs in on Bitcoin: ""Both know all about speed, crashes, risk management, and holding on.""",5,9830.0
1,2018-02-23,-0.023743,-0.018892,SEC cools a red-hot crypto market by picking up the telephone,12,10145.0
2,2018-02-24,0.000000,0.371600,It’s the Holy Grail of cryptocurrencies -- an asset with all the benefits of decentralization but none of the volatility,2,9666.3
3,2018-02-25,0.034028,-0.129762,French watchdog clamps down on crypto trading,8,9557.4
4,2018-02-26,0.028571,0.108757,Craig Wright sued over $5 billion Bitcoin mined by colleague,14,10321.0
...,...,...,...,...,...,...
598,2021-01-16,0.033333,0.113150,Creditors seeking to regain Bitcoin lost on the Mt. Gox exchange in 2014 have a chance to get their digital assets back before legal claims are settled,2,36055.0
599,2021-01-19,0.297222,0.071367,Far-right groups and personalities received a windfall of cryptocurrency donations in the weeks before the Capitol attack and evidence “strongly suggests” it came from a now-deceased computer programmer in France,3,35958.0
600,2021-01-20,0.167257,0.051200,BlackRock is adding cash-settled Bitcoin futures as an eligible investment to two funds,11,35521.1
601,2021-01-21,0.061515,0.018740,"Much thanks to and for letting me write about why Bitcoin is a religion for . In this piece,…",10,30873.0


In [16]:
# used for filtering tweets per date
#crypto_df[(crypto_df['Date'] > '2018-03-19') & (crypto_df['Date'] < '2018-03-21')]

In [115]:
#Creating an RESULTS dataframe for easier evaluation


#Setting up a dataframe with results
results_news = pd.DataFrame(index=['Crypto','Coin_Telegraph','Coin_Desk','Bitcoin','BTCTN','Combined_news'], 
                           columns=['corr_textblob','corr_vader','btc_twt_count','avg_textblob','avg_vader'])

#getting correlation between textblob and btc price
results_news.at['Crypto','corr_textblob'] = round(btc_crypto['Close'].corr(btc_crypto['textblob']),2)

#getting correlation between vadercompound and btc price
results_news.at['Crypto','corr_vader'] = round(btc_crypto['Close'].corr(btc_crypto['vader_compound']),2)

#getting tweet count that talk specifically about bitcoin
results_news.at['Crypto','btc_twt_count'] = btc_crypto['tweet_id'].sum()

#average textblob sentiment
results_news.at['Crypto','avg_textblob'] = round(btc_crypto['textblob'].mean(),2)

#average vader sentiment
results_news.at['Crypto','avg_vader'] = round(btc_crypto['vader_compound'].mean(),2)
results_news

Unnamed: 0,corr_textblob,corr_vader,btc_twt_count,avg_textblob,avg_vader
Crypto,0.03,0.1,2044.0,0.07,0.02
Coin_Telegraph,,,,,
Coin_Desk,,,,,
Bitcoin,,,,,
BTCTN,,,,,
Combined_news,,,,,


In [19]:
# SECTION B: GETTING ALL TWEETS FROM @Cointelegraph. Same code as before, just applied to a different twitter handle

screen_name = 'Cointelegraph'

cointele_tweets = tw.Cursor(api.user_timeline,
              screen_name=screen_name,
              tweet_mode='extended',
              lang =" en").items()

cointele_json = []

for tweet in cointele_tweets:
        cointele_json.append(tweet._json)


In [24]:
# removing URLs, and creating DataFrame for specific tweet columns

cointele_list=[]

with open('cointele.txt', 'w') as file:
        file.write(json.dumps(cointele_json, indent=4))

with open('cointele.txt', encoding='utf-8') as json_file:  
    all_data = json.load(json_file)
    for each_dictionary in all_data:
        tweet_id = each_dictionary['id']
        text = clean_tweets(each_dictionary['full_text'])
        Date = each_dictionary['created_at']
        name = each_dictionary['user']['screen_name']
        
        cointele_list.append({'tweet_id': str(tweet_id),
                             'text': str(text),
                             'name': str(name),
                             'Date': Date,
                            })

        cointele_df = pd.DataFrame(cointele_list, columns = 
                                  ['Date','tweet_id', 'text', 
                                   'name'])
 
   
cointele_df.to_csv('cointele.csv', index = False)

#pd.options.display.max_columns = None
#pd.options.display.max_colwidth = 500

In [63]:
#Reading in CSV file

pd.options.display.max_colwidth = 500
cointele_df = pd.read_csv('cointele.csv')
cointele_df

Unnamed: 0,Date,tweet_id,text,name
0,Fri Jan 22 19:15:01 +0000 2021,1352696285812445184,"VanEck is giving another crack at launching a digital asset-related fund. Although the firm has an unrelated lawsuit lingering, they have decided to proceed with their filing with the SEC.",Cointelegraph
1,Fri Jan 22 19:02:07 +0000 2021,1352693042176647168,"Here we go, live now.",Cointelegraph
2,Fri Jan 22 18:50:36 +0000 2021,1352690142083915781,"Decentraland $MANA is proof that real money can flow through virtual worlds, but questions whether the pl…",Cointelegraph
3,Fri Jan 22 18:48:22 +0000 2021,1352689580907106304,"The turnaround follows a turbulent 24 hours in which Bitcoin slid to $28,950 — a key level when it comes to support from whales and only its second dip below $30,000 this year.",Cointelegraph
4,Fri Jan 22 18:45:01 +0000 2021,1352688737633902597,"There is a colossal semiconductor shortage for Bitcoin miners. The leading supplier of mining rigs, Bitmain, is sold out of inventory until August 2021. Will this supply squeeze push prices of equipment through the ceiling?",Cointelegraph
...,...,...,...,...
3226,Sun Nov 29 20:46:02 +0000 2020,1333150246872907776,"As the DeFi community slowly regains faith in the AMM exchange and $SUSHI token prices rise, Sushiswap successfully fights off off the worst of an exploit",Cointelegraph
3227,Sun Nov 29 20:19:33 +0000 2020,1333143581456953345,". has partnered with the US-based pizza chain and has begun accepting nine cryptocurrencies as a form of payment for cheesy pies -- and it won't cost 10,000 Bitcoin",Cointelegraph
3228,Sun Nov 29 20:00:32 +0000 2020,1333138796041412608,"Big Brother is watching your crypto.\n\nRegulators want more personal records kept on Bitcoin users, but activists are starting push back. Is it time for Bitcoin users to take their privacy more seriously?",Cointelegraph
3229,Sun Nov 29 19:17:56 +0000 2020,1333128077241565185,"There are 3 correct answers, so no excuse for fking this up.\n\n#Bitcoin is:",Cointelegraph


In [65]:
# Cleaning up Date from twitter format to python readable format

cointele_df = cointele_df.dropna().drop_duplicates()
cointele_df['Date'] = pd.to_datetime(cointele_df['Date'],utc=True)
cointele_df['Date'] = cointele_df['Date'].dt.date
cointele_df['Date'] = pd.to_datetime(cointele_df['Date'])
cointele_df.dtypes

# Adding textblob sentiment
cointele_df['textblob']= cointele_df['text'].apply(lambda tweet: TextBlob(tweet).sentiment.polarity)
cointele_df

# Adding vader sentiment
cointele_df['vader_compound']= cointele_df.apply(lambda row: sentiment_score_compound(row['text']), axis=1)
cointele_df.drop_duplicates()

# Only getting tweets with word bitcoin
search = ['Bitcoin','bitcoin','BTC','btc','crypto','cryptocurrency']
cointele_df = cointele_df[cointele_df['text'].str.contains('|'.join(search))]

pd.options.display.max_colwidth = 500
cointele_df

Unnamed: 0,Date,tweet_id,text,name,textblob,vader_compound
3,2021-01-22,1352689580907106304,"The turnaround follows a turbulent 24 hours in which Bitcoin slid to $28,950 — a key level when it comes to support from whales and only its second dip below $30,000 this year.",Cointelegraph,0.000000,0.4019
4,2021-01-22,1352688737633902597,"There is a colossal semiconductor shortage for Bitcoin miners. The leading supplier of mining rigs, Bitmain, is sold out of inventory until August 2021. Will this supply squeeze push prices of equipment through the ceiling?",Cointelegraph,0.300000,-0.2500
5,2021-01-22,1352685049154572289,"A class-action lawsuit is being brought against Bit Digital, a Bitcoin mining company, for allegedly fabricating the extent of their BTC mining operations. Will the results of this suit affect other publicly traded crypto companies?",Cointelegraph,-0.056250,-0.2263
6,2021-01-22,1352681206249369600,"Latin America will be gaining more access to crypto thanks to Mercado Bitcoin. This leading Brazilian exchange just announced a probable expansion into Chile, Mexico, and Argentina!",Cointelegraph,0.375000,0.7177
8,2021-01-22,1352678424058372096,"“If Bitcoin is good enough for banks, insurance companies, and cities, it is good enough for a small nation.” Can the top crypto become a reserve of a nation?",Cointelegraph,0.275000,0.7650
...,...,...,...,...,...,...
3224,2020-11-30,1333199697746427904,Mining difficulty on the #Bitcoin network rose 8.9% today as #Ethereum's reached an all-time high on Friday,Cointelegraph,0.380000,-0.2500
3227,2020-11-29,1333143581456953345,". has partnered with the US-based pizza chain and has begun accepting nine cryptocurrencies as a form of payment for cheesy pies -- and it won't cost 10,000 Bitcoin",Cointelegraph,-0.500000,0.3818
3228,2020-11-29,1333138796041412608,"Big Brother is watching your crypto.\n\nRegulators want more personal records kept on Bitcoin users, but activists are starting push back. Is it time for Bitcoin users to take their privacy more seriously?",Cointelegraph,0.095238,-0.3269
3229,2020-11-29,1333128077241565185,"There are 3 correct answers, so no excuse for fking this up.\n\n#Bitcoin is:",Cointelegraph,0.025000,-0.0346


In [116]:
#Grouping sentiment by date and looking at charts of sentiment and volume over time

cointele_bydate = cointele_df.groupby(['Date'], as_index=False).agg({'textblob': 'mean','vader_compound': 'mean', 'text': 'first', 'tweet_id': 'count'})
cointele_bydate

#Combining BTC price with dataframe

#cointele_bydate = cointele_bydate.set_index('Date')
btc_cointele = pd.merge(cointele_bydate,btc_df, how='inner', on= "Date")
btc_cointele


Unnamed: 0,Date,textblob,vader_compound,text,tweet_id,Close
0,2020-11-29,-0.110565,-0.063225,". has partnered with the US-based pizza chain and has begun accepting nine cryptocurrencies as a form of payment for cheesy pies -- and it won't cost 10,000 Bitcoin",4,18196.0
1,2020-11-30,0.128774,0.182138,"Boosted by Bitcoin’s new highs, these 5 cryptos may be ready to stage a comeback. Time to investigate the charts before alt season starts.\n\n \n(Reporting vis )",29,19686.0
2,2020-12-01,0.103403,0.121316,You think $25k is bullish? How about $590k for Bitcoin?\n\nMarket analysis firm thinks a little-known index could signal a new run to $590k.,37,18807.0
3,2020-12-02,0.059429,0.114358,"In the short term, traders expect #Bitcoin price to consolidate as $BTC searches for direction in the $18,200 to $19,5…",36,19241.0
4,2020-12-03,0.059477,0.100382,"The UK’s adoption of Bitcoin could be approaching faster than anticipated.\n\nBrexit Party leader appears to be pivoting to crypto after deriding the continual printing of fiat in Britain, calling it “funny money.”",40,19447.0
5,2020-12-04,0.068489,-0.039331,"CEO of MicroStrategy says his company bought $50m in #Bitcoin at $19,427 each, and now holds 40,824 $BTC.\n\nAt what point does a business intelligence company become a company that simply does business intelligently?",26,18657.0
6,2020-12-05,0.075653,-0.090242,"Missed the news this week? Catch up with Hodler's Digest. \n\nBitcoin momentarily breaks its all-time high, Wall Street creates crypto indexes in 2021, and buys more #BTC. \n\nBut which billionaire says cash is trash?",12,19157.0
7,2020-12-06,0.119397,0.378545,"Podcaster and #Bitcoin Cash proponent have both called on Donald Trump to pardon Ross Ulbricht, Julian Assange, and Edward Snowden before he leaves office on Jan. 20",11,19410.0
8,2020-12-07,0.137977,0.132214,"The crypto crowd had mixed feelings around ‘Shameless’ () name-dropping Bitcoin, Ethereum, and Tether on its recent premier. Enthusiasts are excited about the mention, but the dialogue was a jumble of buzzwords that didn’t make much sense.",21,19196.0
9,2020-12-08,0.109912,0.157414,$COMP price rallied 56% in the past week as #DeFi sector activity increases and #Bitcoin price trades in a sideways ra…,37,18335.0


In [117]:
#Creating an RESULTS dataframe for easier evaluation

#getting correlation between textblob and btc price
results_news.at['Coin_Telegraph','corr_textblob'] = round(btc_cointele['Close'].corr(btc_cointele['textblob']),2)

#getting correlation between vadercompound and btc price
results_news.at['Coin_Telegraph','corr_vader'] = round(btc_cointele['Close'].corr(btc_cointele['vader_compound']),2)

#getting tweet count that talk specifically about bitcoin
results_news.at['Coin_Telegraph','btc_twt_count'] = btc_cointele['tweet_id'].sum()

#average textblob sentiment
results_news.at['Coin_Telegraph','avg_textblob'] = round(btc_cointele['textblob'].mean(),2)

#average vader sentiment
results_news.at['Coin_Telegraph','avg_vader'] = round(btc_cointele['vader_compound'].mean(),2)
results_news

Unnamed: 0,corr_textblob,corr_vader,btc_twt_count,avg_textblob,avg_vader
Crypto,0.03,0.1,2044.0,0.07,0.02
Coin_Telegraph,0.19,0.17,1865.0,0.09,0.14
Coin_Desk,,,,,
Bitcoin,,,,,
BTCTN,,,,,
Combined_news,,,,,


In [32]:
# SECTION C: GETTING ALL TWEETS FROM @CoinDesk. Same code as before, just applied to a different twitter handle

screen_name = 'CoinDesk'

coindesktweets = tw.Cursor(api.user_timeline,
              screen_name=screen_name,
              tweet_mode='extended',
              lang =" en").items()

coindesk_json = []

for tweet in coindesktweets:
    coindesk_json.append(tweet._json)


In [33]:
# removing URLs, and creating DataFrame for specific tweet columns

coindesk_list=[]

with open('coindesk.txt', 'w') as file:
        file.write(json.dumps(coindesk_json, indent=4))

with open('coindesk.txt', encoding='utf-8') as json_file:  
    all_data = json.load(json_file)
    for each_dictionary in all_data:
        tweet_id = each_dictionary['id']
        text = clean_tweets(each_dictionary['full_text'])
        Date = each_dictionary['created_at']
        name = each_dictionary['user']['screen_name']
        
        coindesk_list.append({'tweet_id': str(tweet_id),
                             'text': str(text),
                             'name': str(name),
                             'Date': Date,
                            })

        coindesk_df = pd.DataFrame(coindesk_list, columns = 
                                  ['Date','tweet_id', 'text', 
                                   'name'])

        
coindesk_df.to_csv('coindesk.csv', index = False)
#pd.options.display.max_columns = None
#pd.options.display.max_colwidth = 500


In [68]:
#Reading in CSV file

pd.options.display.max_colwidth = 500
coindesk_df = pd.read_csv('coindesk.csv')
coindesk_df

Unnamed: 0,Date,tweet_id,text,name
0,Fri Jan 22 19:14:12 +0000 2021,1352696082170568706,"Institutions never wait and see during each market sell-off, on-chain data reveals. Instead, they have been buying the dip even during this week’s correction. \n\n reports\n\n",CoinDesk
1,Fri Jan 22 18:07:10 +0000 2021,1352679212579233794,"""Although the U.S. seems all-powerful right now, digital alternatives to the dollar-centric financial system are emerging. Washington, Wall Street and Silicon Valley must be ready,"" says . \n\n",CoinDesk
2,Fri Jan 22 17:37:59 +0000 2021,1352671868252114944,"In their latest report, shows that the share of bitcoin in addresses holding more than 10,000 BTC has decrease…",CoinDesk
3,Fri Jan 22 17:04:46 +0000 2021,1352663510610825218,NEW: Upstart crypto exchange is opening pre-orders for a debit card linked to #bitcoin and fiat accounts.\n\n reports\n\n,CoinDesk
4,Fri Jan 22 16:43:20 +0000 2021,1352658115964108802,"Samsung is considering bankrolling a $10 billion chipmaking plant in Austin, Texas, according to a Bloomberg report. \n\nby \n\n",CoinDesk
...,...,...,...,...
3190,Wed Oct 14 21:17:36 +0000 2020,1316488349239185411,pushes back on this and says congestion in DeFi will still happen on Ethereum 2.0.,CoinDesk
3191,Wed Oct 14 21:15:24 +0000 2020,1316487797080027137,1/ ETH KILLERS. Chain interoperability + composability panel at #investeth with …,CoinDesk
3192,Wed Oct 14 21:15:10 +0000 2020,1316487737009205249,"COMING UP: , and join #investeth to explore how automated market makers, portfolio managers and aggregation tools have kick-started an arms race for end users.\n\nRegister:",CoinDesk
3193,Wed Oct 14 21:14:05 +0000 2020,1316487465126027264,"Alternative chains can be additive and complementary to Ethereum, says. “If you want resources you go to where t…",CoinDesk


In [69]:
# Cleaning up Date from twitter format to python readable format

coindesk_df = coindesk_df.dropna().drop_duplicates()
coindesk_df['Date'] = pd.to_datetime(coindesk_df['Date'],utc=True)
coindesk_df['Date'] = coindesk_df['Date'].dt.date
coindesk_df['Date'] = pd.to_datetime(coindesk_df['Date'])
coindesk_df.dtypes

# Adding textblob sentiment
coindesk_df['textblob']= coindesk_df['text'].apply(lambda tweet: TextBlob(tweet).sentiment.polarity)
coindesk_df

# Adding vader sentiment
coindesk_df['vader_compound']= coindesk_df.apply(lambda row: sentiment_score_compound(row['text']), axis=1)
coindesk_df.drop_duplicates()

# Only getting tweets with word bitcoin
search = ['Bitcoin','bitcoin','BTC','btc']
coindesk_df = coindesk_df[coindesk_df['text'].str.contains('|'.join(search))]

pd.options.display.max_colwidth = 500
coindesk_df

Unnamed: 0,Date,tweet_id,text,name,textblob,vader_compound
2,2021-01-22,1352671868252114944,"In their latest report, shows that the share of bitcoin in addresses holding more than 10,000 BTC has decrease…",CoinDesk,0.500000,0.2960
3,2021-01-22,1352663510610825218,NEW: Upstart crypto exchange is opening pre-orders for a debit card linked to #bitcoin and fiat accounts.\n\n reports\n\n,CoinDesk,0.136364,0.0000
10,2021-01-22,1352625786889658369,"MicroStrategy buys the dip, adds $10M to bitcoin treasury.\n\nby \n\n",CoinDesk,0.000000,0.2023
11,2021-01-22,1352625178472292353,FIRST MOVER: Bearish price-chart patterns and ill-understood news items combined to cause bitcoin's biggest price plun…,CoinDesk,0.250000,0.0000
13,2021-01-22,1352617677982994434,"JUST IN: Inspired by high fees in a hot market, is the latest crypto exchange to support the #Bitcoin Lightning Network for deposits and withdrawals.\n\n reports\n",CoinDesk,0.303333,0.7096
...,...,...,...,...,...,...
3125,2020-10-15,1316810039773089796,"Post Hacker Team, Coinbase is leaning into #Bitcoin culture (minus all the erratic token listings lol)",CoinDesk,0.350000,0.4215
3127,2020-10-15,1316801563990601731,"JUST IN: says it will sponsor a pair of Bitcoin Core developers, open-source coders who help to maintain the Bitcoin network.\n\n reports\n\n",CoinDesk,0.000000,0.4019
3130,2020-10-15,1316784753639665665,". will be watching the markets for the beacon chain launch - a once-in-a-lifetime event for Ethereum. Predicting a bullish market for $ETH, will eye correlation and effect on $BTC. \n\n#investeth\n\n",CoinDesk,0.000000,0.4215
3156,2020-10-15,1316707262447181827,The world's soaring stockpile of negative-yielding bonds is likely to bolster bitcoin's appeal as an alternative inves…,CoinDesk,0.000000,0.0000


In [118]:
#Grouping sentiment by date and looking at charts of sentiment and volume over time

coindesk_bydate = coindesk_df.groupby(['Date'], as_index=False).agg({'textblob': 'mean','vader_compound': 'mean', 'text': 'first', 'tweet_id': 'count'})
coindesk_bydate

#Combining BTC price with dataframe
btc_coindesk = pd.merge(coindesk_bydate,btc_df, how='inner', on= "Date")


In [119]:
#Creating an RESULTS dataframe for easier evaluation

#getting correlation between textblob and btc price
results_news.at['Coin_Desk','corr_textblob'] = round(btc_coindesk['Close'].corr(btc_coindesk['textblob']),2)

#getting correlation between vadercompound and btc price
results_news.at['Coin_Desk','corr_vader'] = round(btc_coindesk['Close'].corr(btc_coindesk['vader_compound']),2)

#getting tweet count that talk specifically about bitcoin
results_news.at['Coin_Desk','btc_twt_count'] = btc_coindesk['tweet_id'].sum()

#average textblob sentiment
results_news.at['Coin_Desk','avg_textblob'] = round(btc_coindesk['textblob'].mean(),2)

#average vader sentiment
results_news.at['Coin_Desk','avg_vader'] = round(btc_coindesk['vader_compound'].mean(),2)
results_news

Unnamed: 0,corr_textblob,corr_vader,btc_twt_count,avg_textblob,avg_vader
Crypto,0.03,0.1,2044.0,0.07,0.02
Coin_Telegraph,0.19,0.17,1865.0,0.09,0.14
Coin_Desk,-0.06,-0.08,1071.0,0.09,0.15
Bitcoin,,,,,
BTCTN,,,,,
Combined_news,,,,,


In [37]:
# SECTION D: GETTING ALL TWEETS FROM @Bitcoin. Same code as before, just applied to a different twitter handle

screen_name = 'Bitcoin'

bitcointweets = tw.Cursor(api.user_timeline,
              screen_name=screen_name,
              tweet_mode='extended',
              lang =" en").items()

bitcoin_json = []

for tweet in bitcointweets:
    bitcoin_json.append(tweet._json)

In [38]:
# removing URLs, and creating DataFrame for specific tweet columns

bitcoin_list=[]

with open('bitcoin.txt', 'w') as file:
        file.write(json.dumps(bitcoin_json, indent=4))

with open('bitcoin.txt', encoding='utf-8') as json_file:  
    all_data = json.load(json_file)
    for each_dictionary in all_data:
        tweet_id = each_dictionary['id']
        text = clean_tweets(each_dictionary['full_text'])
        Date = each_dictionary['created_at']
        name = each_dictionary['user']['screen_name']
        
        bitcoin_list.append({'tweet_id': str(tweet_id),
                             'text': str(text),
                             'name': str(name),
                             'Date': Date,
                            })

        bitcoin_df = pd.DataFrame(bitcoin_list, columns = 
                                  ['Date','tweet_id', 'text', 
                                   'name'])

bitcoin_df.to_csv('bitcoin.csv', index = False)        
        
#pd.options.display.max_columns = None
#pd.options.display.max_colwidth = 500


In [72]:
#Reading CSV file
pd.options.display.max_colwidth = 500
bitcoin_df = pd.read_csv('bitcoin.csv')
bitcoin_df

Unnamed: 0,Date,tweet_id,text,name
0,Fri Jan 22 01:51:55 +0000 2021,1352433783518072832,"""Under the new program, Sequoia employees may elect to defer a portion of their salary into #Bitcoin, Bitcoin Cash, or the platform's ether."" CEO \n\n",Bitcoin
1,Fri Jan 22 01:44:26 +0000 2021,1352431900648869888,"In the past 24 hours #Bitcoin has crashed more than $4,500 or about 15%.\n\nLess than a year ago #Bitcoin was trading for less than $4,500.",Bitcoin
2,Thu Jan 21 19:18:52 +0000 2021,1352334869980827649,"bitcoin is an extremely scarce asset going through a volatile adoption phase, there will be many hills and valleys as it co…",Bitcoin
3,Thu Jan 21 17:41:22 +0000 2021,1352310330240614401,Why the Bitcoin double-spend story is being misinterpreted,Bitcoin
4,Thu Jan 21 17:41:03 +0000 2021,1352310250615889920,"This #bitcoin bull run has a different set of facts including fast followers who can, and have been waiting to, deploy 100s of…",Bitcoin
...,...,...,...,...
3199,Thu Apr 19 12:37:00 +0000 2018,986946766615601152,math: How big do blocks need to be to handle twitter on-chain?\n\n,Bitcoin
3200,Thu Apr 19 10:58:44 +0000 2018,986922037770764288,Continue reading: \nTry Memo:,Bitcoin
3201,Thu Apr 19 10:55:27 +0000 2018,986921209060143104,"Join a Bitcoin meetup near you! The next Orange County Bitcoin Cash meetup will be held on Wednesday, April 25th. \n\n",Bitcoin
3202,Wed Apr 18 08:54:45 +0000 2018,986528449895411712,"Hearing a lot of talk about ""utility tokens"" these days. Seems like the only ""utility"" offered by these tokens is shoe-horned in to random projects to justify ICO money-grabs.\n\nBitcoin Cash is the currency with the most utility of all.",Bitcoin


In [73]:
# Cleaning up Date from twitter format to python readable format

bitcoin_df = bitcoin_df.dropna().drop_duplicates()
bitcoin_df['Date'] = pd.to_datetime(bitcoin_df['Date'],utc=True)
bitcoin_df['Date'] = bitcoin_df['Date'].dt.date
bitcoin_df['Date'] = pd.to_datetime(bitcoin_df['Date'])
bitcoin_df.dtypes

# Adding textblob sentiment
bitcoin_df['textblob']= bitcoin_df['text'].apply(lambda tweet: TextBlob(tweet).sentiment.polarity)
bitcoin_df

# Adding vader sentiment
bitcoin_df['vader_compound']= bitcoin_df.apply(lambda row: sentiment_score_compound(row['text']), axis=1)
bitcoin_df.drop_duplicates()

# Only getting tweets with word bitcoin
search = ['Bitcoin','bitcoin','BTC','btc']
bitcoin_df = bitcoin_df[bitcoin_df['text'].str.contains('|'.join(search))]

pd.options.display.max_colwidth = 500
bitcoin_df

Unnamed: 0,Date,tweet_id,text,name,textblob,vader_compound
0,2021-01-22,1352433783518072832,"""Under the new program, Sequoia employees may elect to defer a portion of their salary into #Bitcoin, Bitcoin Cash, or the platform's ether."" CEO \n\n",Bitcoin,0.468182,-0.2960
1,2021-01-22,1352431900648869888,"In the past 24 hours #Bitcoin has crashed more than $4,500 or about 15%.\n\nLess than a year ago #Bitcoin was trading for less than $4,500.",Bitcoin,-0.020833,0.0000
2,2021-01-21,1352334869980827649,"bitcoin is an extremely scarce asset going through a volatile adoption phase, there will be many hills and valleys as it co…",Bitcoin,0.187500,0.3581
3,2021-01-21,1352310330240614401,Why the Bitcoin double-spend story is being misinterpreted,Bitcoin,0.000000,-0.3182
4,2021-01-21,1352310250615889920,"This #bitcoin bull run has a different set of facts including fast followers who can, and have been waiting to, deploy 100s of…",Bitcoin,0.100000,0.0000
...,...,...,...,...,...,...
3195,2018-04-21,987604852602802176,"CheapAir to begin accepting Bitcoin Cash in the coming weeks, but are concerned about 's BIP70 requirement. \n\n\n\n",Bitcoin,0.000000,0.2023
3197,2018-04-21,987590660965457920,PSA: So called 'low-fee' cryptocurrency Litecoin has transaction fees 20x higher than Bitcoin Cash\n\n,Bitcoin,0.250000,0.0000
3198,2018-04-19,986965918700847105,Bitcoin Core Loses Out to Bitcoin Cash – Is it the Beginning of the End?\n\n,Bitcoin,-0.300000,-0.3182
3201,2018-04-19,986921209060143104,"Join a Bitcoin meetup near you! The next Orange County Bitcoin Cash meetup will be held on Wednesday, April 25th. \n\n",Bitcoin,0.062500,0.3595


In [120]:
#Grouping sentiment by date and looking at charts of sentiment and volume over time

bitcoin_bydate = bitcoin_df.groupby(['Date'], as_index=False).agg({'textblob': 'mean','vader_compound': 'mean', 'text': 'first', 'tweet_id': 'count'})
bitcoin_bydate

#Combining BTC price with dataframe
btc_bitcoin = pd.merge(bitcoin_bydate,btc_df, how='inner', on= "Date")

In [121]:
#Creating an RESULTS dataframe for easier evaluation

#getting correlation between textblob and btc price
results_news.at['Bitcoin','corr_textblob'] = round(btc_bitcoin['Close'].corr(btc_bitcoin['textblob']),2)

#getting correlation between vadercompound and btc price
results_news.at['Bitcoin','corr_vader'] = round(btc_bitcoin['Close'].corr(btc_bitcoin['vader_compound']),2)

#getting tweet count that talk specifically about bitcoin
results_news.at['Bitcoin','btc_twt_count'] = btc_bitcoin['tweet_id'].sum()

#average textblob sentiment
results_news.at['Bitcoin','avg_textblob'] = round(btc_bitcoin['textblob'].mean(),2)

#average vader sentiment
results_news.at['Bitcoin','avg_vader'] = round(btc_bitcoin['vader_compound'].mean(),2)
results_news

Unnamed: 0,corr_textblob,corr_vader,btc_twt_count,avg_textblob,avg_vader
Crypto,0.03,0.1,2044.0,0.07,0.02
Coin_Telegraph,0.19,0.17,1865.0,0.09,0.14
Coin_Desk,-0.06,-0.08,1071.0,0.09,0.15
Bitcoin,0.03,0.07,1545.0,0.1,0.12
BTCTN,,,,,
Combined_news,,,,,


In [43]:
# SECTION E: GETTING ALL TWEETS FROM @BTCTN (Bitcoin News). Same code as before, just applied to a different twitter handle

screen_name = 'BTCTN'

btctntweets = tw.Cursor(api.user_timeline,
              screen_name=screen_name,
              tweet_mode='extended',
              lang =" en").items()

btctn_json = []

for tweet in btctntweets:
    btctn_json.append(tweet._json)

In [44]:
# removing URLs, and creating DataFrame for specific tweet columns

btctn_list=[]

with open('btctn.txt', 'w') as file:
        file.write(json.dumps(btctn_json, indent=4))

with open('btctn.txt', encoding='utf-8') as json_file:  
    all_data = json.load(json_file)
    for each_dictionary in all_data:
        tweet_id = each_dictionary['id']
        text = clean_tweets(each_dictionary['full_text'])
        Date = each_dictionary['created_at']
        name = each_dictionary['user']['screen_name']
        
        btctn_list.append({'tweet_id': str(tweet_id),
                             'text': str(text),
                             'name': str(name),
                             'Date': Date,
                            })

        btctn_df = pd.DataFrame(btctn_list, columns = 
                                  ['Date','tweet_id', 'text', 
                                   'name'])
        
btctn_df.to_csv('btctn.csv', index = False)

#pd.options.display.max_columns = None
#pd.options.display.max_colwidth = 500


In [76]:
#Reading CSV file
pd.options.display.max_colwidth = 500
btctn_df = pd.read_csv('btctn.csv')
btctn_df

Unnamed: 0,Date,tweet_id,text,name
0,Fri Jan 22 18:06:57 +0000 2021,1352679156212002818,"The Nasdaq-listed multibillion-dollar company Microstrategy has bought even more bitcoins. ""We now hold approximately 70,784 #bitcoins,"" revealed the CEO. #btc #cryptocurrency",BTCTN
1,Fri Jan 22 17:39:38 +0000 2021,1352672284931194881,"The equipment will be delivered in eight batches throughout 2021, starting with a delivery of 500 in May and June, and then 900 each month until year-end. #bitcoin #bitcoinmining #cryptocurrency",BTCTN
2,Fri Jan 22 15:47:19 +0000 2021,1352644017654292487,"“This dump might have started from #BTC miners,” the Cryptoquant executive tweeted on Friday. #Cryptocurrency",BTCTN
3,Fri Jan 22 15:08:24 +0000 2021,1352634224319328256,,BTCTN
4,Fri Jan 22 14:35:21 +0000 2021,1352625907807236098,"After receiving “heavy complaints” from users about the constant interruptions in the services, the SEC reportedly decided on asking Bitkub to temporarily shut down. #Cryptocurrency",BTCTN
...,...,...,...,...
3217,Thu Nov 14 06:01:26 +0000 2019,1194857854479589376,‘Zimdollars’ Issued for First Time in Ten Years Amidst Continued Hyperinflation,BTCTN
3218,Thu Nov 14 01:18:01 +0000 2019,1194786531837431813,How Crypto Assets Are Capturing the Attention Economy,BTCTN
3219,Wed Nov 13 22:21:48 +0000 2019,1194742184609751041,Developer Demos Smart Card That Produces Bitcoin Cash Signatures,BTCTN
3220,Wed Nov 13 19:21:37 +0000 2019,1194696840492523522,Dubai to Host City’s First Bitcoin Cash Meetup on Saturday,BTCTN


In [77]:
# Cleaning up Date from twitter format to python readable format

btctn_df = btctn_df.dropna().drop_duplicates()
btctn_df['Date'] = pd.to_datetime(btctn_df['Date'],utc=True)
btctn_df['Date'] = btctn_df['Date'].dt.date
btctn_df['Date'] = pd.to_datetime(btctn_df['Date'])
btctn_df.dtypes

# Adding textblob sentiment
btctn_df['textblob']= btctn_df['text'].apply(lambda tweet: TextBlob(tweet).sentiment.polarity)
btctn_df

# Adding vader sentiment
btctn_df['vader_compound']= btctn_df.apply(lambda row: sentiment_score_compound(row['text']), axis=1)
btctn_df.drop_duplicates()

# Only getting tweets with word bitcoin
search = ['Bitcoin','bitcoin','BTC','btc']
btctn_df = btctn_df[btctn_df['text'].str.contains('|'.join(search))]

pd.options.display.max_colwidth = 500
btctn_df

Unnamed: 0,Date,tweet_id,text,name,textblob,vader_compound
0,2021-01-22,1352679156212002818,"The Nasdaq-listed multibillion-dollar company Microstrategy has bought even more bitcoins. ""We now hold approximately 70,784 #bitcoins,"" revealed the CEO. #btc #cryptocurrency",BTCTN,0.050000,0.0000
1,2021-01-22,1352672284931194881,"The equipment will be delivered in eight batches throughout 2021, starting with a delivery of 500 in May and June, and then 900 each month until year-end. #bitcoin #bitcoinmining #cryptocurrency",BTCTN,0.000000,0.0000
2,2021-01-22,1352644017654292487,"“This dump might have started from #BTC miners,” the Cryptoquant executive tweeted on Friday. #Cryptocurrency",BTCTN,0.000000,-0.3818
5,2021-01-22,1352619836183031809,"""President Biden has frozen all agency rulemaking pending further review. This includes former Secretary Mnuchin's proposal on 'unhosted wallets.'"" #FinCEN #bitcoin #btc",BTCTN,0.000000,0.0000
8,2021-01-22,1352536371949432834,"Hive Blockchain has bought 6,400 next generation mining machines from #Canaan, taking its total hash rate to 1,229 PH/s. #bitcoin #bitcoinmining #cryptocurrency",BTCTN,0.000000,0.0000
...,...,...,...,...,...,...
3200,2019-11-16,1195811599535226885,Bitcoin Cash Community Funds Eatbch Trip to Ghana,BTCTN,0.000000,0.0000
3206,2019-11-15,1195347243705999364,Bitcoin Cash Upgrade Complete: 2 New Protocol Changes Added,BTCTN,0.118182,0.0000
3210,2019-11-15,1195144802343538690,Iranian Grid Explains Electrical Costs Will Fluctuate for Bitcoin Miners,BTCTN,0.000000,0.0000
3219,2019-11-13,1194742184609751041,Developer Demos Smart Card That Produces Bitcoin Cash Signatures,BTCTN,0.214286,0.4019


In [122]:
#Grouping sentiment by date and looking at charts of sentiment and volume over time

btctn_bydate = btctn_df.groupby(['Date'], as_index=False).agg({'textblob': 'mean','vader_compound': 'mean', 'text': 'first', 'tweet_id': 'count'})
btctn_bydate

#Combining BTC price with dataframe
btc_btctn = pd.merge(btctn_bydate,btc_df, how='inner', on= "Date")



In [123]:
#Creating an RESULTS dataframe for easier evaluation

#getting correlation between textblob and btc price
results_news.at['BTCTN','corr_textblob'] = round(btc_btctn['Close'].corr(btc_btctn['textblob']),2)

#getting correlation between vadercompound and btc price
results_news.at['BTCTN','corr_vader'] = round(btc_btctn['Close'].corr(btc_btctn['vader_compound']),2)

#getting tweet count that talk specifically about bitcoin
results_news.at['BTCTN','btc_twt_count'] = btc_btctn['tweet_id'].sum()

#average textblob sentiment
results_news.at['BTCTN','avg_textblob'] = round(btc_btctn['textblob'].mean(),2)

#average vader sentiment
results_news.at['BTCTN','avg_vader'] = round(btc_btctn['vader_compound'].mean(),2)
results_news

Unnamed: 0,corr_textblob,corr_vader,btc_twt_count,avg_textblob,avg_vader
Crypto,0.03,0.1,2044.0,0.07,0.02
Coin_Telegraph,0.19,0.17,1865.0,0.09,0.14
Coin_Desk,-0.06,-0.08,1071.0,0.09,0.15
Bitcoin,0.03,0.07,1545.0,0.1,0.12
BTCTN,0.04,0.08,1388.0,0.06,0.06
Combined_news,,,,,


In [124]:
#Combining the  dataframes together.

combined_news = [crypto_df,cointele_df, coindesk_df,bitcoin_df,btctn_df]  
combined_newsdf = pd.concat(combined_news, axis=0, ignore_index=True)

In [125]:
combined_newsdf = combined_newsdf.sort_values('Date')
combined_newsdf

Unnamed: 0,Date,tweet_id,text,name,textblob,vader_compound
2042,2018-02-22,966798649073307648,More than 4 million have signed up to open accounts at Robinhood as the brokerage app begins offering commission-free trading of Bitcoin and Ethereum today for the first time,crypto,0.250000,0.0000
2041,2018-02-22,966804469185990656,This J-pop band is getting in on the cryptocurrency craze. has more from Tokyo via,crypto,0.500000,-0.1531
2040,2018-02-22,966810149695475712,Venezuelan citizens can't buy their own country's crypto coin because it doesn't accept bolivars and they aren't allowed to buy foreign currency,crypto,0.237500,-0.2924
2039,2018-02-22,966822236060610562,"The USA Luge team weighs in on Bitcoin: ""Both know all about speed, crashes, risk management, and holding on.""",crypto,0.000000,-0.2732
2043,2018-02-22,966790587449008128,American lugers are accepting cryptocurrency donations to help fund their Olympic ambitions,crypto,0.000000,0.6486
...,...,...,...,...,...,...
2076,2021-01-22,1352440985293189121,Police across the UK now have a safe way to store #crypto connected to criminal activities following an agreement with digital asset custodian Komainu,Cointelegraph,0.025000,0.6369
2077,2021-01-22,1352440854800179201,"One JPMorgan strategist argues that Bitcoin is more like a cyclical asset, comparing it to stocks in the automobile industry. But an analyst from the same company thinks BTC might be a better choice than gold. \n\nCan they both be right?",Cointelegraph,0.321429,0.6904
2078,2021-01-22,1352424501003214852,"It turns out isn’t as bearish on crypto as reports suggest, with the Treasury Secretary nominee citing its potential to improve the financial system",Cointelegraph,0.000000,0.5719
2058,2021-01-22,1352619411690016768,"The crypto and blockchain markets stood out in the face of a crisis that spared almost no sector in 2020, Cointelegraph Brasil reports",Cointelegraph,0.000000,-0.7190


In [126]:
#Grouping sentiment by date and looking at charts of sentiment and volume over time

combined_news_bydate = combined_newsdf.groupby(['Date'], as_index=False).agg({'textblob': 'mean','vader_compound': 'mean', 'text': 'first', 'tweet_id': 'count'})
combined_news_bydate

#Combining BTC price with dataframe

btc_combined_news = pd.merge(combined_news_bydate,btc_df, how='inner', on= "Date")

#exporting dataframe to csv file

btc_combined_news.to_csv('news_sentiment_btc.csv', index = False) 


In [127]:
#Creating an RESULTS dataframe for easier evaluation

#getting correlation between textblob and btc price
results_news.at['Combined_news','corr_textblob'] = round(btc_combined_news['Close'].corr(btc_combined_news['textblob']),2)

#getting correlation between vadercompound and btc price
results_news.at['Combined_news','corr_vader'] = round(btc_combined_news['Close'].corr(btc_combined_news['vader_compound']),2)

#getting tweet count that talk specifically about bitcoin
results_news.at['Combined_news','btc_twt_count'] = results_news.iloc[0:4,2].sum(axis=0)

#average textblob sentiment
results_news.at['Combined_news','avg_textblob'] = round(btc_combined_news['textblob'].mean(),2)

#average vader sentiment
results_news.at['Combined_news','avg_vader'] = round(btc_combined_news['vader_compound'].mean(),2)
results_news



Unnamed: 0,corr_textblob,corr_vader,btc_twt_count,avg_textblob,avg_vader
Crypto,0.03,0.1,2044,0.07,0.02
Coin_Telegraph,0.19,0.17,1865,0.09,0.14
Coin_Desk,-0.06,-0.08,1071,0.09,0.15
Bitcoin,0.03,0.07,1545,0.1,0.12
BTCTN,0.04,0.08,1388,0.06,0.06
Combined_news,0.03,0.12,6525,0.08,0.06


In [128]:
#exporting dataframe to csv file

results_news.to_csv('news_results.csv', index = True) 