##  Making a Map of Everton Twitter  
A map of twitter users across the globe using the hashtag #Everton

In [76]:
#Load necessary packages
import tweepy
import folium
import geopy
import os
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import pandas as pd
import numpy as np
import json
from datetime import datetime

#load environment serviceabs
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
#set authentication variables
access_token_eve = os.environ.get('access_token')
access_secret_eve = os.environ.get('access_secret')
consumer_key_eve = os.environ.get('api_key')
consumer_secret_eve = os.environ.get('api_secret')

In [64]:
#load the Twitter API, create def for use
def get_twitter_api():
    # personal details
    consumer_key = consumer_key_eve
    consumer_secret = consumer_secret_eve
    access_token = access_token_eve
    access_token_secret = access_secret_eve
    
    # authentication of consumer key and secret
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    
    # authentication of access token and secret
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth, wait_on_rate_limit=True)
    
    return api

In [65]:
#set search for location of the Twitter user
#incorporate Twitter's own Lon/Lat data into the equation:
#https://developer.twitter.com/en/docs/tutorials/filtering-tweets-by-location

def get_tweets(search, count):
    api = get_twitter_api()
    location_data = []
    for tweet in tweepy.Cursor(api.search_tweets, q=search).items(count):
        if hasattr(tweet, 'user') and hasattr(tweet.user, 'screen_name') and hasattr(tweet.user, 'location') and hasattr(tweet, 'created_at') and hasattr(tweet, 'id'):
            if tweet.user.location:
                location_data.append((tweet.id, tweet.user.screen_name, tweet.user.location, str(tweet.created_at)))
    return location_data

In [18]:
#pull the tweets and store in data frame 
#data should include -Twitter screen name; location, created date, Tweet ID

search_terms="#Everton"

tweet_batch = get_tweets(search_terms, 2500)

#print to check
print(tweet_batch)

[(1547926057348517889, 'RossAftbl', 'Glasgow/St Albans', '2022-07-15 12:48:25+00:00'), (1547925446796496896, 'fan_banter', 'United Kingdom', '2022-07-15 12:46:00+00:00'), (1547925444560764928, 'matola_olexandr', 'Odessa, Ukraine', '2022-07-15 12:45:59+00:00'), (1547924528625434626, 'StavrateCom', 'Europe', '2022-07-15 12:42:21+00:00'), (1547922996949504003, 'GEEZERBOY1977', 'England, United Kingdom', '2022-07-15 12:36:16+00:00'), (1547919631796473858, 'Fussballeck', 'Deutschland', '2022-07-15 12:22:53+00:00'), (1547913168784986114, 'SimonCBondar', 'Seacombe, England', '2022-07-15 11:57:12+00:00')]


In [70]:
#convert array to NumPy array
tweets=np.array(tweet_batch)

In [68]:
#convert to dataframe
df = pd.DataFrame(tweets, columns=["ID", "Name", "Location", "Date-Time"])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   ID         7 non-null      object
 1   Name       7 non-null      object
 2   Location   7 non-null      object
 3   Date-Time  7 non-null      object
dtypes: object(4)
memory usage: 352.0+ bytes


In [46]:
def addLatLon(data):
    #define the geolocator service
    geo_locator = Nominatim(user_agent="EvertonTweeters")
    
    #define the geocoder, with a rate limiter so we don't break Nominatum
    geocode = RateLimiter(geo_locator.geocode, min_delay_seconds=2)
    
    print("first step: ", data.head(2))
    
    for (Location) in data:
        #geocode the location, then add lat/lon to dataframe; if not a valid location, put "NaN"
        data["Location"] = data["Location"].apply(geocode)
        data['Lat'] = data['Location'].apply(lambda x: x.latitude if x else None)
        data["Lon"] = data['Location'].apply(lambda x: x.longitude if x else None)
    
    return data

In [47]:
tweet_loc = addLatLon(df)

first step:                      ID        Name           Location  \
0  1547926057348517889   RossAftbl  Glasgow/St Albans   
1  1547925446796496896  fan_banter     United Kingdom   

                   Date-Time  
0  2022-07-15 12:48:25+00:00  
1  2022-07-15 12:46:00+00:00  


In [48]:
tweet_loc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   ID         7 non-null      object 
 1   Name       7 non-null      object 
 2   Location   7 non-null      object 
 3   Date-Time  7 non-null      object 
 4   Lat        7 non-null      float64
 5   Lon        7 non-null      float64
dtypes: float64(2), object(4)
memory usage: 464.0+ bytes


In [97]:
#write data to json
#to add: filter to prevent duplicate tweets

#set json file name to include today's date
date = datetime.now().strftime("%Y_%m_%d-%I:%M:%S_%p")

json_path = str(f'tweet_loc_{date}.json')
print(json_path)

tweet_json = tweet_loc.to_json('json_path', orient='records')
print(tweet_json)


SyntaxError: unterminated string literal (detected at line 10) (204965442.py, line 10)

In [59]:
def put_markers(map, data):
    for i in range(0,len(data)):
       folium.Marker(
          location=[data.iloc[i]['Lat'], data.iloc[i]['Lon']],
          popup=data.iloc[i]['Name'],
        ).add_to(map)

In [60]:
map = folium.Map(location=[0,0], tiles="OpenStreetMap", zoom_start=2)

put_markers(map, tweet_loc)

map

In [61]:
map