In [1]:
import pandas as pd
import os
import math
import requests
import re
from shapely.geometry import shape
import folium 

In [2]:
tweets = pd.read_csv("./data/tweet_location_data_cleaned.csv")
country_counts = pd.read_csv("./data/tweet_frequency_per_country.csv")

In [3]:
tweets.head()

Unnamed: 0,created_at,text,user_location,user_name,user_verified,user_description,user_created_at,country,iso2,iso3,city,state
0,Wed Jan 20 23:59:58 +0000 2021,WE DID IT JOE!! \nYOUâ€™RE gonnandjfndndhebehr...,,Kay Brittany,0.0,SkinCare Products @kayscoffeescrub 23 years yo...,Fri May 09 03:19:52 +0000 2014,No Location,,,,Nan
1,Wed Jan 20 23:59:58 +0000 2021,"#JoeBiden : A better person, a better man, a b...",,Rev. Al,0.0,Poor but Happy Democrat.,Sat Jul 28 19:35:49 +0000 2012,No Location,,,,Nan
2,Wed Jan 20 23:59:57 +0000 2021,Girl Crushing on #KamalaHarris while at the sa...,,RAJIV KUMAR,0.0,#Entrepreneur II Food Evangelist II Economist ...,Thu Feb 19 12:00:05 +0000 2009,No Location,,,,Nan
3,Wed Jan 20 23:59:55 +0000 2021,Curious: how many #Trump supporters who canâ€™...,Brooklyn Ny,Yisroel,0.0,life enthusiast. wide eyed idealist. nerd wann...,Thu Mar 24 02:44:03 +0000 2011,United States of America,NY,NYC,Brooklyn,
4,Wed Jan 20 23:59:55 +0000 2021,@ZaidZamanHamid's account has been withheld in...,,Sohail Ashraf,0.0,,Sat Jun 23 09:36:52 +0000 2012,No Location,,,,Nan


In [4]:
tweets['userid'] = pd.factorize(tweets['user_name'])[0]

### Converting to Date and Time Format

In [None]:
tweets["created_at"] = pd.to_datetime(tweets["created_at"], errors='coerce')
tweets["created_at"] = tweets["created_at"].dt.strftime('%Y-%m-%d %H:%M:%S')

tweets["user_created_at"] = pd.to_datetime(tweets["user_created_at"], errors='coerce')
tweets["user_created_at"] = tweets["user_created_at"].dt.strftime('%Y-%m-%d %H:%M:%S')

  tweets["created_at"] = pd.to_datetime(tweets["created_at"], errors='coerce')
  tweets["user_created_at"] = pd.to_datetime(tweets["user_created_at"], errors='coerce')


In [None]:
tweets.sample(2)

### Removing Special Characters 

In [None]:
tweets['text'] = tweets['text'].str.replace(r'[^\w\s]','',regex=True) 
tweets['text'] = tweets['text'].apply(lambda x: re.split('https:\/\/.*', str(x))[0])
tweets['text'] = tweets['text'].str.title()
tweets['text'] = tweets['text'].str.strip()

In [None]:
tweets.sample(5)

In [None]:
user_columns = ['userid', 'user_verified', 'user_description', 'user_created_at']  
user = tweets[user_columns]
user.to_csv("./data/user.csv", index = False)

tweets_columns = ['userid','user_name','text', 'country','created_at']
tweets_info = tweets[tweets_columns]
tweets_info.to_csv("./data/tweets.csv",  index = False)

country = ['userid','country', 'iso2','iso3','city','state']
country = tweets[country]
country.to_csv("./data/country.csv",  index = False)

In [None]:
user

In [None]:
tweets_info

In [None]:
country

### Top Ten Countries with the Highest Frequency of Tweets

In [None]:
country_counts["count_log"] = country_counts["count"].apply(math.log10)
country_counts.head(5)

### World Map Showing the Distibution of Tweets

In [None]:
m = folium.Map(location = [0,0], zoom_start = 2, zoom_control=False,
               scrollWheelZoom=False,
               dragging=False)

geojson_url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/world-countries.json'
response = requests.get(geojson_url)
geojson = response.json()

In [None]:
folium.Choropleth(
    geo_data=geojson,
    data=country_counts,
    fill_color="Greens",
    columns=['country', 'count_log'],
    key_on='feature.properties.name',
    fill_opacity=0.8,
    line_opacity=0.4,
    legend_name="Tweet Locations Frequency"
).add_to(m)

In [None]:
count_dict = dict(zip(country_counts['country'], country_counts['count']))

In [None]:
geojson_layer = folium.GeoJson(
    geojson,
    name="Country Info",
    style_function=lambda x: {
        'fillOpacity': 0,
        'color': 'black',
        'weight': 0.1
    },
    tooltip=folium.GeoJsonTooltip(
        fields=['name', 'count'],
        aliases=['Country:', 'Tweet Count:'],
        localize=True,
        sticky=True,
        labels=True
    )
).add_to(m)

In [None]:
for feature in geojson['features']:
    country_name = feature['properties']['name']
    feature['properties']['count'] = count_dict.get(country_name, 0)
    count = count_dict.get(country_name, 0)
    geom = shape(feature['geometry'])
    lon, lat = geom.centroid.coords[0]
    
    folium.Marker(
        location=[lat, lon],
        icon=folium.DivIcon(html=f"<div style='font-size: 7pt; color: black;'>{count}</div>")
    ).add_to(m)

In [None]:
m