In [None]:
import json

##### Enter your keys/secrets as strings in the following fields
credentials = {}
credentials['CONSUMER_KEY'] = 'YOUR CONSUMER_KEY'
credentials['CONSUMER_SECRET'] = 'YOUR CONSUMER_SECRET'
credentials['ACCESS_TOKEN'] = 'YOUR ACCESS_TOKEN'
credentials['ACCESS_SECRET'] = 'YOUR ACCESS_SECRET'

##### Save the credentials object to file
with open("twitter_credentials.json", "w") as file:
    json.dump(credentials, file)

In [1]:
from twython import Twython
import json

# Load credentials from json file
with open("twitter_credentials.json", "r") as file:
    creds = json.load(file)

# Instantiate an object
python_tweets = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])

# Create our query
query = {'q': 'bitcoin btc BTC Bitcoin BITCOIN',
        'result_type': 'popular',
        'count': 1000,
        'lang': 'en',
        }

In [2]:
import pandas as pd

# Search tweets
dict_ = {'user': [], 'date': [], 'text': [], 'favorite_count': []}
for status in python_tweets.search(**query)['statuses']:
    dict_['user'].append(status['user']['screen_name'])
    dict_['date'].append(status['created_at'])
    dict_['text'].append(status['text'])
    dict_['favorite_count'].append(status['favorite_count'])

# Structure data in a pandas DataFrame for easier manipulation
df = pd.DataFrame(dict_)
df.sort_values(by='favorite_count', inplace=True, ascending=False)
df
#So we got some interesting tweets. Note that these are the most popular tweets containing the words "bitcoin" and "btc" in the past 7 days.

Unnamed: 0,user,date,text,favorite_count
10,bobbyclee,Thu Jan 07 23:57:18 +0000 2021,IT HAS HAPPENED!! $BTC over $40k!!\n#Bitcoin h...,12066
0,binance,Wed Jan 13 15:17:36 +0000 2021,"$2,000 #Bitcoin Giveaway!💵\n\nTo enter, farm $...",6918
1,tyler,Wed Jan 13 15:10:07 +0000 2021,#Bitcoin consolidating at 34k. This is good fo...,3079
2,PeterMcCormack,Thu Jan 14 03:43:46 +0000 2021,eToro to ration #bitcoin due to limited supply...,605
5,cburniske,Thu Jan 14 00:14:52 +0000 2021,Bitcoin is unlike any other bubble we’ve seen ...,495
4,Investingcom,Wed Jan 13 10:00:03 +0000 2021,*ECB PRESIDENT LAGARDE SAYS BITCOIN NEEDS REGU...,476
3,PrestonPysh,Wed Jan 13 15:59:52 +0000 2021,@bitcoinzay People on Wall Street measure thei...,472
6,CoinDesk,Tue Jan 12 22:59:04 +0000 2021,“I think bitcoin wants to be a six-figure numb...,221
8,business,Thu Jan 14 14:50:07 +0000 2021,Temperatures of -40 degrees are a big part of ...,110
9,Investingcom,Wed Jan 13 06:47:10 +0000 2021,"*BITCOIN DROPS 6%, OR $2,000, TO FALL BACK BEL...",108


In [3]:
df.shape

(12, 4)

In [4]:
# Streaming API: collect a stream of tweets using the Twitter Streaming API.
# Twython has an easy way to do it through the TwythonStreamer class. 
# We'll need to define a class MyStreamer that inherits TwythonStreamer and then override the on_success and on_error methods

from twython import TwythonStreamer
import csv

# Filter out unwanted data
def process_tweet(tweet):
    d = {}
    d['hashtags'] = [hashtag['text'] for hashtag in tweet['entities']['hashtags']]
    d['text'] = tweet['text']
    d['user'] = tweet['user']['screen_name']
    d['user_loc'] = tweet['user']['location']
    return d
    
    
# Create a class that inherits TwythonStreamer
class MyStreamer(TwythonStreamer):     

    # Received data
    def on_success(self, data):

        # Only collect tweets in English
        if data['lang'] == 'en':
            tweet_data = process_tweet(data)
            self.save_to_csv(tweet_data)

    # Problem with the API
    def on_error(self, status_code, data):
        print(status_code, data)
        self.disconnect()
        
    # Save each tweet to csv file
    def save_to_csv(self, tweet):
        with open(r'saved_tweets.csv', 'a', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(list(tweet.values()))

In [None]:
# Instantiate from our streaming class
stream = MyStreamer(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'], 
                    creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'])
# Start the stream
#stream.statuses.filter(track='bitcoin btc BTC Bitcoin BITCOIN')
stream.statuses.filter(track='bitcoin')

In [None]:
import pandas as pd
tweets = pd.read_csv("saved_tweets.csv")
tweets.head()

In [None]:
from collections import Counter
import ast

tweets = pd.read_csv("saved_tweets.csv")

# Extract hashtags and put them in a list
list_hashtag_strings = [entry for entry in tweets.hashtags]
list_hashtag_lists = ast.literal_eval(','.join(list_hashtag_strings))
hashtag_list = [ht.lower() for list_ in list_hashtag_lists for ht in list_]

# Count most common hashtags
counter_hashtags = Counter(hashtag_list)
counter_hashtags.most_common(20)

In [None]:
from geopy.geocoders import Nominatim
import gmplot

geolocator = Nominatim()

# Go through all tweets and add locations to 'coordinates' dictionary
coordinates = {'latitude': [], 'longitude': []}
for count, user_loc in enumerate(tweets.location):
    try:
        location = geolocator.geocode(user_loc)
        
        # If coordinates are found for location
        if location:
            coordinates['latitude'].append(location.latitude)
            coordinates['longitude'].append(location.longitude)
            
    # If too many connection requests
    except:
        pass
    
# Instantiate and center a GoogleMapPlotter object to show our map
gmap = gmplot.GoogleMapPlotter(30, 0, 3)

# Insert points on the map passing a list of latitudes and longitudes
gmap.heatmap(coordinates['latitude'], coordinates['longitude'], radius=20)

# Save the map to html file
gmap.draw("python_heatmap.html")