In [2]:
# Import the Twython class
from twython import Twython
from twython import TwythonStreamer
import json
import csv
import pandas as pd

DIR = './data/'
# Load credentials from json file
with open(DIR+"twitter_credentials.json", "r") as file:
    creds = json.load(file)

In [3]:
# Instantiate an object
python_tweets = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])

# Create query
query = {'q': 'learn python',
        'result_type': 'popular',
        'count': 10,
        'lang': 'en',
        }

In [4]:
new_query = {'q': 'new try',
             'count': 20,
             'lang': 'en',
             'result_type': 'mixed',
            }

In [2]:
# Filter out unwanted data
def process_tweet(tweet):
    d = {}
    d['hashtags'] = [hashtag['text'] for hashtag in tweet['entities']['hashtags']]
    d['text'] = tweet['text']
    d['user'] = tweet['user']['screen_name']
    d['user_loc'] = tweet['user']['location']
    return d

KeyboardInterrupt: 

In [None]:
# Create a class that inherits TwythonStreamer
class MyStreamer(TwythonStreamer):     

    # Received data
    def on_success(self, data):

        # Only collect tweets in English
        if data['lang'] == 'en':
            tweet_data = process_tweet(data)
            self.save_to_csv(tweet_data)

    # Problem with the API
    def on_error(self, status_code, data):
        print(status_code, data)
        self.disconnect()
        
    # Save each tweet to csv file
    def save_to_csv(self, tweet):
        with open(r'saved_tweets.csv', 'a') as file:
            writer = csv.writer(file)
            writer.writerow(list(tweet.values()))

In [None]:
# Instantiate from our streaming class
stream = MyStreamer(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'], 
                    creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'])
# Start the stream
stream.statuses.filter(track='python')

In [5]:
# Search tweets
dict_ = {'user': [], 'date': [], 'text': [], 'favorite_count': []}
for status in python_tweets.search(**new_query)['statuses']:
    dict_['user'].append(status['user']['screen_name'])
    dict_['date'].append(status['created_at'])
    dict_['text'].append(status['text'])
    dict_['favorite_count'].append(status['favorite_count'])


In [6]:
# Structure data in a pandas DataFrame for easier manipulation
df = pd.DataFrame(dict_)
df.sort_values(by='favorite_count', inplace=True, ascending=False)
df.head(5)

Unnamed: 0,user,date,text,favorite_count
0,StrawHatDLaw,Sun Dec 06 20:21:11 +0000 2020,Next week we play Burnley just take some risks...,0
1,yakityyakyaks,Sun Dec 06 20:21:10 +0000 2020,"@KayaJones LOL, NEW YORK TIMES was source for ...",0
18,lancifer69,Sun Dec 06 20:20:11 +0000 2020,RT @FugglesWarlock: PEPPERS &amp; PEARS • Have...,0
17,mrgrupss,Sun Dec 06 20:20:12 +0000 2020,@Emily_OConnor19 I am nervous for that!! HOWEV...,0
16,thecrespoclan,Sun Dec 06 20:20:17 +0000 2020,RT @katee1021: Yeah they’re getting tooo big a...,0


In [22]:
new_df = pd.read_csv("./saved_tweets.csv",sep=',',encoding = "ISO-8859-1",header=None,names=['hashtags','text','user','location'])

In [24]:
print(len(new_df))

99


In [28]:
new_df.head(5)

Unnamed: 0,hashtags,text,user,location
0,[],RT @Gho5tdogCodes: Python basics about functio...,CodeFlawCo,India
1,"['logo', 'MachineLearning', 'Python', 'program...",RT @ujjal95371221: Are you looking for a #logo...,CodeFlawCo,India
2,"['STARTUP', 'BigData', 'Analytics', 'DataScien...",RT @aaaamhim: $ITOX $1 potential unknown #STAR...,CodeFlawCo,India
3,"['python', 'programming', 'programmers', 'Word...",RT @aashutosh396: How many tweets a day do you...,CodeFlawCo,India
4,"['logo', 'MachineLearning', 'Python', 'program...",RT @ujjal95371221: Are you looking for a #logo...,CodeFlawCo,India


In [37]:
from geopy.geocoders import Nominatim
import gmplot

geolocator = Nominatim()

# Go through all tweets and add locations to 'coordinates' dictionary
coordinates = {'latitude': [], 'longitude': []}
for count, user_loc in enumerate(new_df.location):
    try:
        location = geolocator.geocode(user_loc)
        
        # If coordinates are found for location
        if location:
            coordinates['latitude'].append(location.latitude)
            coordinates['longitude'].append(location.longitude)
            
    # If too many connection requests
    except:
        pass
    
# Instantiate and center a GoogleMapPlotter object to show our map
gmap = gmplot.GoogleMapPlotter(30, 0, 3)

# Insert points on the map passing a list of latitudes and longitudes
gmap.heatmap(coordinates['latitude'], coordinates['longitude'], radius=20)

# Save the map to html file
gmap.draw("python_heatmap.html")