In [None]:
import pandas as pd
import re
import regex
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import geopandas as gp
import geoplot as gplt
import geoplot.crs as gcrs
import contextily as ctx
import numpy as np
import shapely as shapely
from shapely.geometry import Polygon
from shapely.ops import transform
import collections
from collections import Counter
from typing import List, Tuple, Dict, Union, Generator, Optional
from pyproj import Transformer, CRS, Proj
import glob
import os
import matplotlib as mpl
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import matplotlib.colors as mcolors
import advertools as adv
import warnings
import emoji
warnings.filterwarnings('ignore')
import matplotlib.patheffects as pe
import adjustText as aT

In [None]:
df = gp.read_file(r"C:\Users\saman\OneDrive\Documents\Thesis\Data\RawData_Cleaned_Final.geojson")

In [None]:
df.head()

In [None]:
# convert to geodf, import using WGS 84 since that's how it exported from pgadmin
gdf = gp.GeoDataFrame(df,geometry =gp.points_from_xy(df.long,df.lat),crs =4326)

In [None]:
# reproejct to Mollweide for visualization purposes later
gdf.to_crs("ESRI:54009",inplace=True)

## Start of temporal exploratory analysis:

In [None]:
# first I'll create columns with aggregated data information to make the creation of temporal subsets more straightforward
gdf['post_publish_date'] = pd.to_datetime(gdf['post_publish_date'])
gdf['Month/Year'] = gdf['post_publish_date'].dt.to_period('M')  # add new column showing timestamps aggregated to monthly intervals
gdf['Week/Month'] = gdf['post_publish_date'].dt.to_period('W')  # add new column showing timestamps aggregated to weekly intervals
gdf['HalfMonth'] = gdf['post_publish_date'] + pd.offsets.SemiMonthEnd()  # add new column showing timestamps aggregated to biweekly intervals
gdf.head()

In [None]:
# now let's create sub-dtasets for typicality calculations, starting with temporal subsets (monthly)
gdf_jan = gdf[gdf['Month/Year'] == '2020-01']
gdf_feb = gdf[gdf['Month/Year'] == '2020-02']
gdf_mar = gdf[gdf['Month/Year'] == '2020-03']
gdf_apr = gdf[gdf['Month/Year'] == '2020-04']
gdf_may = gdf[gdf['Month/Year'] == '2020-05']
gdf_jun = gdf[gdf['Month/Year'] == '2020-06']
gdf_jul = gdf[gdf['Month/Year'] == '2020-07']
gdf_aug = gdf[gdf['Month/Year'] == '2020-08']
gdf_sep = gdf[gdf['Month/Year'] == '2020-09']
gdf_oct = gdf[gdf['Month/Year'] == '2020-10']
# note - no data for november
gdf_dec = gdf[gdf['Month/Year'] == '2020-12']
gdf_jan.head()

In [None]:
#  set up functions to calculate each component of the typicality equation

# number of a certain emoji within subset
def EmojiSubsetCounter(emojiname, subset):
    emojicounter = 0
    for post in subset['emoji generic']:
        if emojiname in post:
            emojicounter += 1
    # print("Number of " + emojiname + "in subset: " + str(emojicounter))
    return emojicounter


# number of total emojis in subset
def AllEmojiSubsetCounter(subset):
    emojicounter = 0
    for post in subset['emoji generic']:
        data = regex.findall(r'\X', post)
        for word in data:
            if any(char in emoji.UNICODE_EMOJI['en'] for char in word):
                emojicounter += 1
    # print("Number of emojis in subset: " + str(emojicounter))
    return emojicounter


# number of specific emoji in whole dataset
def EmojiTotalCounter(emojiname, totaldataset):
    emojicounter = 0
    for post in totaldataset['emoji generic']:
        if emojiname in post:
            emojicounter += 1
    # print("Number of " + emojiname + "in total dataset: " + str(emojicounter))
    return emojicounter



# number of emojis in whole dataset
def AllEmojiTotalCounter(totaldataset):
    emojicounter = 0
    for post in totaldataset['emoji generic']:
        data = regex.findall(r'\X', post)
        for word in data:
            if any(char in emoji.UNICODE_EMOJI['en'] for char in word):
                emojicounter += 1
    # print("Number of emojis in total dataset: " + str(emojicounter))
    return emojicounter


# typicality equation

def TypicalityEquation (emojisubset, allemojisubset, emojitotal, allemojitotal):
    t = ((emojisubset/allemojisubset)-(emojitotal/allemojitotal))/(emojitotal/allemojitotal)
    # print("Typicality: " + str(t))
    return t


In [None]:
# create a function to count the total frequency of the most commonly used hashtags
def most_common_hashtags(labels, quantity):
    #words = [i.split(" ", 3)[0] for i in labels]
    #counter = Counter(words).most_common(quantity)
    hashtags = [(re.split(',', i)) for i in labels]
    counter = Counter(x for xs in hashtags for x in set(xs)).most_common(quantity)
    df = pd.DataFrame(counter, columns=["Hashtag", "Occurence number"])\
                        .sort_values(by="Occurence number", ascending=True)
    
    df = df[df["Hashtag"] != " "].reset_index(drop=True)
    
    return df

In [None]:
# create function to calculate monthly typicality values
def getMonthlyTypicality(emo):
    emocount_total = EmojiTotalCounter(emo, gdf)
    T_Jan =TypicalityEquation(EmojiSubsetCounter(emo, gdf_jan), AllEmojiSubsetCounter(gdf_jan), emocount_total, 8807548)
    T_Feb =TypicalityEquation(EmojiSubsetCounter(emo, gdf_feb), AllEmojiSubsetCounter(gdf_feb), emocount_total, 8807548)
    T_Mar =TypicalityEquation(EmojiSubsetCounter(emo, gdf_mar), AllEmojiSubsetCounter(gdf_mar), emocount_total, 8807548)
    T_Apr =TypicalityEquation(EmojiSubsetCounter(emo, gdf_apr), AllEmojiSubsetCounter(gdf_apr), emocount_total, 8807548)
    T_May =TypicalityEquation(EmojiSubsetCounter(emo, gdf_may), AllEmojiSubsetCounter(gdf_may), emocount_total, 8807548)
    T_Jun =TypicalityEquation(EmojiSubsetCounter(emo, gdf_jun), AllEmojiSubsetCounter(gdf_jun), emocount_total, 8807548)
    T_Jul =TypicalityEquation(EmojiSubsetCounter(emo, gdf_jul), AllEmojiSubsetCounter(gdf_jul), emocount_total, 8807548)
    T_Aug =TypicalityEquation(EmojiSubsetCounter(emo, gdf_aug), AllEmojiSubsetCounter(gdf_aug), emocount_total, 8807548)
    T_Sep =TypicalityEquation(EmojiSubsetCounter(emo, gdf_sep), AllEmojiSubsetCounter(gdf_sep), emocount_total, 8807548)
    T_Oct =TypicalityEquation(EmojiSubsetCounter(emo, gdf_oct), AllEmojiSubsetCounter(gdf_oct), emocount_total, 8807548)
    T_Dec =TypicalityEquation(EmojiSubsetCounter(emo, gdf_dec), AllEmojiSubsetCounter(gdf_dec), emocount_total, 8807548)
    return T_Jan, T_Feb, T_Mar, T_Apr, T_May, T_Jun, T_Jul, T_Aug, T_Sep, T_Oct, T_Dec

In [None]:
# create spatial subsets, starting with country boundaries
countries_gdf = gp.read_file("Europe_Clipped_BBox.shp")
countries_gdf

In [None]:
countries_gdf.to_crs("ESRI:54009", inplace=True)

In [None]:
# Plot the custom shapefile (clipped to fit data)
fig, ax = plt.subplots(figsize=(15, 6))
countries_gdf.plot(ax=ax)
ax.set_title("Study Area", fontsize=20)
ax.set_axis_off()
plt.show()
ax.set_axis_off()

# create grids here (just to start, emoji specific ones will be below)

In [None]:
"""
Defining constants to be used throughout the program

"""

#create grids based on the custom made eu shapefile

GRID_SIZE_METERS = 100000 
                        
# target projection: Mollweide
EPSG_CODE = 54009
CRS_PROJ = f"esri:{EPSG_CODE}"

# Input projection WGS 84
CRS_WGS = "epsg:4326"

# define Transformer ahead of time
# with xy-order of coordinates
PROJ_TRANSFORMER = Transformer.from_crs(
    CRS_WGS, CRS_PROJ, always_xy=True)

# also define reverse projection
PROJ_TRANSFORMER_BACK = Transformer.from_crs(
    CRS_PROJ, CRS_WGS, always_xy=True)

#projecting the bounds of the eu-shapefile to Mollweide

XMIN = PROJ_TRANSFORMER.transform(-18.729512 , 29.234046)[0]
XMAX = PROJ_TRANSFORMER.transform(39.73858, 29.234046)[0]
YMAX = PROJ_TRANSFORMER.transform(49.59352369, 71.16987838)[1]
YMIN = PROJ_TRANSFORMER.transform(49.59352369, 28.017169)[1]

# color map to use for typicality maps
BrBG = cm.get_cmap('BrBG')
newcmp = ListedColormap(BrBG(np.linspace(0.25, 0.75, 10)))

In [None]:
def create_grids():
    
    """
    Creating polygons based on the grid size
    """
    
    width = GRID_SIZE_METERS
    length = GRID_SIZE_METERS
    cols = list(range(int(np.floor(XMIN)), int(np.ceil(XMAX)), width))
    rows = list(range(int(np.floor(YMIN)), int(np.ceil(YMAX)), length))
    rows.reverse()

    polygons = []
    for x in cols:
         for y in rows:
                # combine to tuple: (x,y, poly)
                # and append to list
                polygons.append(
                    (x, y,
                     Polygon([
                         (x, y),
                         (x+width, y),
                         (x+width, y-length),
                         (x, y-length)])))
    grid = pd.DataFrame(polygons)
        # name columns
    col_labels=['xbin', 'ybin', 'bin_poly']
    grid.columns = col_labels
        # use x and y as index columns
    grid.set_index(['xbin', 'ybin'], inplace=True)
    grid = gp.GeoDataFrame(
            grid.drop(
                columns=["bin_poly"]),
                geometry=grid.bin_poly)
    grid.crs = CRS_PROJ
    return grid,cols,rows

grid,cols,rows = create_grids()

In [None]:
ybins = np.array(rows)
xbins = np.array(cols)

def get_best_bins(search_values_x, search_values_y,xbins, ybins): 
    """Will return best bin for a lat and lng input
    
    Note: prepare bins and values in correct matching projection
    
    """
    xbins_idx = np.digitize(search_values_x, xbins, right=False)
    ybins_idx = np.digitize(search_values_y, ybins, right=False)
    return (xbins[xbins_idx-1], ybins[ybins_idx-1])


xbins_match, ybins_match = get_best_bins(
    search_values_x=gdf.geometry.x.to_numpy(),
    search_values_y=gdf.geometry.y.to_numpy(),
    xbins=xbins, ybins=ybins)

In [None]:
# plot the generated grid alongside the european country outlines
base = grid.plot(figsize=(22,28), color='white', edgecolor='black', linewidth=0.1)
plot = countries_gdf.boundary.plot(ax=base)

In [None]:
gdf.loc[:, 'xbins_match'] = xbins_match
gdf.loc[:, 'ybins_match'] = ybins_match
gdf.drop(columns = ['long','lat','geometry'],inplace =True)
gdf.set_index(['xbins_match', 'ybins_match'], inplace=True)
# gdf.dropna(subset = 'hashtag', inplace =True)
grid.sort_index(inplace =True)
gdf.sort_index(inplace = True)
common_idx = grid.index.intersection(gdf.index) 
#instead of a spatial join, indexes are used to find which hashtag belongs to which grid
gdf

In [None]:
#counting the occurence of each emoji in preparation of typicality calculations
count = Counter()  
gdf['emoji'].str.split(',').apply(count.update)

In [None]:
def grid_typicality(new_test,idx):    
     
        #calculating frequency for each grid (sub-dataset) 
        counter = Counter()
        new_test.str.split(',').apply(counter.update)
        n_s = counter[EMOJI]
        if (n_s == 0):
            typ.loc[idx,'typicality'] = -1.0
        else:    
            N_s = sum(counter.values())
            F_s = n_s/N_s
            typ.loc[idx,'typicality'] = (F_s - F_t)/F_t 

In [None]:
# set up tools to make country grids so topical consistency can be analyzed within countries

In [None]:
def create_grids():
    
#     Creating polygons based on the grid size
    
    width = GRID_SIZE_METERS
    length = GRID_SIZE_METERS
    cols = list(range(int(np.floor(XMIN)), int(np.ceil(XMAX)), width))
    rows = list(range(int(np.floor(YMIN)), int(np.ceil(YMAX)), length))
    rows.reverse()

    polygons = []
    for x in cols:
         for y in rows:
                # combine to tuple: (x,y, poly)
                # and append to list
                polygons.append(
                    (x, y,
                     Polygon([
                         (x, y),
                         (x+width, y),
                         (x+width, y-length),
                         (x, y-length)])))
    grid = pd.DataFrame(polygons)
        # name columns
    col_labels=['xbin', 'ybin', 'bin_poly']
    grid.columns = col_labels
        # use x and y as index columns
    grid.set_index(['xbin', 'ybin'], inplace=True)
    grid = gp.GeoDataFrame(
            grid.drop(
                columns=["bin_poly"]),
                geometry=grid.bin_poly)
    grid.crs = CRS_PROJ
    return grid,cols,rows

grid,cols,rows = create_grids()

In [None]:
centroid_grid = grid.centroid.reset_index()
centroid_grid.set_index(["xbin", "ybin"], inplace=True)
grid.centroid

In [None]:
from geopandas.tools import sjoin
def intersect_grid_centroids(
    grid: gp.GeoDataFrame, 
    intersect_gdf: gp.GeoDataFrame):
    """Return grid centroids from grid that 
    intersect with intersect_gdf
    """
    centroid_grid = gp.GeoDataFrame(
        grid.centroid)
    centroid_grid.rename(
        columns={0:'geometry'},
        inplace=True)
    centroid_grid.set_geometry(
        'geometry', crs=grid.crs, 
        inplace=True)
    grid_intersect = sjoin(
        centroid_grid, intersect_gdf, 
        how='right')
    grid_intersect.set_index(
        ["index_left0", "index_left1"],
        inplace=True)
    grid_intersect.index.names = ['xbin','ybin']
    return grid.loc[grid_intersect.index]

In [None]:
# and let's use a nice custom color ramp
typ_cmap = mpl.colors.LinearSegmentedColormap.from_list('beigeblue',['#d9af8c','#FFFFFF','#009999'], N=8)

In [None]:
# Let's make some country labels
# Here we find the representative points, copy our original df to a new df, and then set 
# the geometry column to the newly created representative points column 
# (because a GeoPandas df can only have one geometry column)

countries_gdf["rep"] = countries_gdf["geometry"].representative_point()
country_points = countries_gdf.copy()
country_points.set_geometry("rep", inplace = True)
ax = countries_gdf.plot(figsize = (15, 12), color = "whitesmoke", edgecolor = "lightgrey", linewidth = 0.5)
texts = []

for x, y, label in zip(country_points.geometry.x, country_points.geometry.y, country_points["NAME_LONG"]):
    texts.append(plt.text(x, y, label.upper(), fontsize = 8, color="dimgray"))

aT.adjust_text(texts, force_points=0.3, force_text=0.8, expand_points=(0,0), expand_text=(0.5,0.5), ha='left')

In [None]:
# that's too many labels, I'll make a subset so that only large countries are labelled

country_labels = countries_gdf.loc[countries_gdf['NAME_EN'].isin(["France", "Spain", "Germany", "Algeria", "Poland", "Morocoo",
                                                                  "Libya", "Egypt", "Turkey", "United Kingdom", "Hungary", 
                                                                  "Sweden", "Finland", "Norway", "Ireland", "Greece", "Italy",
                                                                  "Belarus", "Ukraine", "Austria", "Romania", "Denmark",
                                                                 "Russia", "Bulgaria", "Lithuania", "Latvia", "Estonia", 
                                                                  "Iceland", "Morocco"])]
country_labels["rep"] = countries_gdf["geometry"].representative_point()
# country_labels = countries_gdf.copy()
country_labels.set_geometry("rep", inplace = True)
ax = countries_gdf.plot(figsize = (15, 12), color = "whitesmoke", edgecolor = "lightgrey", linewidth = 0.5)
texts = []

for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 10, color="dimgray")) #shift labels slightly to the west

aT.adjust_text(texts, force_points=0.3, force_text=1, expand_points=(0,0), expand_text=(1,0.5), avoid_self=False)

In [None]:
# that looks good, now let's add labels for major cities
cities_gdf = gp.read_file("ne_10m_populated_places_simple.shp")
cities_gdf = cities_gdf.drop(columns=['scalerank', 'natscale', 'labelrank', 'featurecla', 'namepar', #remove unecessary columns
                                      'namealt', 'nameascii', 'adm0_a3', 'capalt', 'pop_max', 'sov0name', 'sov_a3', 'adm1name',
                                      'worldcity', 'pop_min', 'pop_other', 'meganame', 'ls_name', 'min_zoom', 'note', 'capin',
                                      'ne_id', 'iso_a2', 'adm0cap', 'adm0name'])
cities_gdf = cities_gdf.drop(cities_gdf.index[cities_gdf['megacity'] == 0]) # drop all cities except those in the top 3 rankings by population
cities_gdf = cities_gdf.loc[cities_gdf['longitude'].between(-18.729512, 39.73858) & cities_gdf['latitude'].between(28.017169, 71.16987838)]
cities_gdf.to_crs("ESRI:54009", inplace=True)
cities_gdf

In [None]:
# that's too many labels, I'll make a subset so that only large countries are labelled

# city_labels = cities_gdf.loc[cities_gdf['name'].isin(["Paris", "Berlin", "London", "Madrid", "Athens", "Budapest", "Vienna",
#                                                      "Stockholm", "Minsk", "Prague", "COpen", "C", "C", "C", "C", "C", "C", "C", "C", "C"
#                                                      "C", "C", "C", "C", "C", "C"])]
city_labels["rep"] = cities_gdf["geometry"].representative_point()
# country_labels = countries_gdf.copy()
city_labels.set_geometry("rep", inplace = True)
ax = countries_gdf.plot(figsize = (15, 12), color = "whitesmoke", edgecolor = "lightgrey", linewidth = 0.5)
texts = []

for x, y, label in zip(city_labels.geometry.x, city_labels.geometry.y, city_labels["name"]):
    texts.append(plt.text(x-100000, y, label.title(), fontsize = 8, color="dimgray")) #shift labels slightly to the west
    aT.adjust_text(texts, force_points=1, force_text=1, expand_points=(1,1), expand_text=(1,0.5), avoid_self=False)
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 10, color="dimgray")) #shift labels slightly to the west
city_labels.plot(ax=ax, markersize=4)

aT.adjust_text(texts, force_points=1, force_text=1, expand_points=(1,1), expand_text=(1,0.5), avoid_self=False)

# figure out how to plot with points, together with country labels

In [None]:
def plot_count(emo):
    emojiname_us = emoji.demojize(str(emo)).replace(":","")
    emojiname = emojiname_us.replace("_", " ")
    emojiname = emojiname.title()
    subset = gdf[gdf['emoji generic'].str.contains(emo)]
    x = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September','October', 'December']
    y = subset['Month/Year'].value_counts().sort_index()
    fig,ax =plt.subplots(figsize = (20,5))
    sns.set(style = 'whitegrid')
    width = 0.75
    ax.bar(x, y, width)
    ax.set_ylabel('Number of Posts')
    ax.set_xlabel('Month in 2020')
    plt.title("Number of " + emojiname + " Emojis Used Per Month", size =30)
    plt.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_count\plot_count_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    plt.show()
    
def plot_monthlytyp(emo):
    emojiname_us = emoji.demojize(str(emo)).replace(":","")
    emojiname = emojiname_us.replace("_", " ")
    emojiname = emojiname.title()
    subsettyp = getMonthlyTypicality(emo)
    x = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September','October', 'December']
    y = subsettyp
    fig, ax = plt.subplots(figsize = (20,5))
    sns.set(style = 'whitegrid')
    width = 0.75
    ax.bar(x, y, width, color = "green")
    ax.set_ylabel('Typicality')
    ax.set_xlabel('Month in 2020')
    plt.title("Typicality of " + emojiname + " Emoji Over Time", size =30)
    plt.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_monthlytyp\plot_monthlytyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    plt.show()

## To test the workflow for the emoji-specific analysis, I'll look at some emojis that probably have topical consistency because they represent an unambiguous real object:

# Beer emoji

In [None]:
beerposts = gdf[gdf['emoji'].str.contains('🍺')]
most_common_hashtags(beerposts['hashtags'], 20)

In [None]:
# create wordcloud of top 20 co-occurring hashtags
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
def make_wordcloud(newlist):
    
    hashtags = []
    for item in newlist:
        hashtags.append(item.lower().split(','))
    flat_list = [item for sublist in hashtags for item in sublist]
    
    text = " ".join(word for word in flat_list)
    stopwords = set(STOPWORDS)
    wordcloud =WordCloud(stopwords=stopwords,
                         prefer_horizontal = 1,
                         max_words = 20,
                         colormap = "winter",
                         background_color="white",
                         width=1600, 
                         height=800,
                         collocations = False,
                         normalize_plurals=False).generate(text)    

    plt.figure(figsize=(20,10))
    plt.tight_layout(pad=0)
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.show()

In [None]:
subset = df[df['emoji generic'].str.contains('🍺')]
make_wordcloud(subset['hashtags'])

This emoji has topical consistency because over 87% of the posts using the top 20 co-occurring hashtags refer to the same topic (beer). So let's go ahead and do some further analysis:

In [None]:
plot_count('🍺')

In [None]:
plot_monthlytyp('🍺')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🍺'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Wine emoji

In [None]:
wineposts = gdf[gdf['emoji'].str.contains('🍷')]
most_common_hashtags(wineposts['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🍷')]
make_wordcloud(subset['hashtags'])

This emoji has topical consistency because over 83% of the posts using the top 20 co-occurring hashtags refer to the same topic(wine). So let's look at the spatial and temporal typicality:

In [None]:
plot_count('🍷')

In [None]:
plot_monthlytyp('🍷')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🍷'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

## now that the workflow has been established, I'll proceed with emojis that might have more debateable meanings:

# Folded Hands

hypothesis: this emoji is used in posts discussing covid-related stay at home measures

In [None]:
prayposts = gdf[gdf['emoji generic'].str.contains('🙏')] # create subset of posts containing specific emoji
most_common_hashtags(prayposts['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🙏')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 62.4%. Posts with this emoji seem to relate to the Christian religion, although there are also many other topics represented.

In [None]:
plot_count('🙏')
plot_monthlytyp('🙏')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🙏'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")

# Masked Face emoji

Hypothesis: this emoji is used in posts discussing covid-related safety measures. This hypothesis will be considered valid if over 80% of the top 20 associated hashtags are topically consistent. 

In [None]:
maskposts = gdf[gdf['emoji'].str.contains('😷')]
most_common_hashtags(maskposts['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('😷')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of ~92%. Posts with this emoji seem to relate to covid-related safety measures. Posts containing this emoji can reasonably be assumed to be discussing this topic. In this case, we can investigate the spatial and temporal typicality to reasonably track the discussion of this topic over time and space. 

In [None]:
plot_count('😷')

In [None]:
plot_monthlytyp('😷')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '😷'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Raised Fist Emoji

Hypothesis: this emoji is used in posts discussing the Black Lives Matter movement. This hypothesis will be considered valid if over 80% of posts using the top 20 associated hashtags are topically consistent. 

In [None]:
fistposts = gdf[gdf['emoji generic'].str.contains('✊')]
most_common_hashtags(fistposts['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('✊')]
make_wordcloud(subset['hashtags'])

It seems most of the associated hashtags have to do with the BLM movement, except for 95 posts by a bar in Greece called bluecollarstore

This emoji demonstrates topical consistency because a significant portion (~91%) of posts with the associated hashtags relate to the same topic (covid-related safety measures). Posts containing this emoji can reasonably be assumed to be discussing this topic. In this case, we can investigate the spatial and temporal typicality to reasonably track the discussion of this topic over time and space. 

In [None]:
plot_count('✊')

In [None]:
plot_monthlytyp('✊')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '✊'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Rainbow Emoji

Hypothesis: this emoji is used in posts discussing Pride/LGBTQIA+ rights. This hypothesis will be considered valid if over 80% of posts with the top 20 associated hashtags are topically consistent. 

In [None]:
rainbowposts = gdf[gdf['emoji'].str.contains('🌈')]
most_common_hashtags(rainbowposts['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🌈')]
make_wordcloud(subset['hashtags'])

This emoji does not demonstrate topical consistency because only ~74% of posts with the associated hashtags relate to the same topic (Pride/LGBTQIA+ rights). Posts containing this emoji could refer to a variety of topics. 

In [None]:
plot_count('🌈')

In [None]:
plot_monthlytyp('🌈')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🌈'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Syringe emoji

Hypothesis: this emoji is used in posts discussing vaccines. This hypothesis will be considered valid if over 80% of posts with the top 20 associated hashtags are topically consistent. 

In [None]:
syringeeposts = gdf[gdf['emoji'].str.contains('💉')]
most_common_hashtags(syringeposts['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('💉')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of ~71%. Posts with this emoji seem to relate mostly to vaccines.

In [None]:
plot_count('💉')

In [None]:
plot_monthlytyp('💉')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '💉'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Christmas Tree emoji

Hypothesis: this emoji is used in posts discussing Christmas. This hypothesis will be considered valid if over 80% of posts with the top 20 associated hashtags are topically consistent. 

In [None]:
xmastreeposts = gdf[gdf['emoji'].str.contains('🎄')]
most_common_hashtags(xmastreeposts['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🎄')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of ~88%. Posts with this emoji seem to relate to Christmas. Posts containing this emoji can reasonably be assumed to be discussing this topic. 

In [None]:
plot_count('🎄')

In [None]:
plot_monthlytyp('🎄')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🎄'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Microbe emoji

Hypothesis: this emoji is used in posts discussing COVID-19. This hypothesis will be considered valid if over 80% of posts with the top 20 associated hashtags are topically consistent. 

In [None]:
microbeposts = gdf[gdf['emoji'].str.contains('🦠')]
most_common_hashtags(microbeposts['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🦠')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a high topical consistency of over 98%. Posts with this emoji seem to relate to the topic of covid-19.  Posts containing this emoji can reasonably be assumed to be discussing this topic.

In [None]:
plot_count('🦠')

In [None]:
plot_monthlytyp('🦠')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🦠'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Calpping Hands Emoji

In [None]:
clap = gdf[gdf['emoji generic'].str.contains('👏')]
most_common_hashtags(clap['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('👏')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a high topical consistency of 89.7%. Posts with this emoji seem to relate to the topic of clapping for healthcare workers during the pandemic.  Posts containing this emoji can reasonably be assumed to be discussing this topic.

In [None]:
plot_count('👏')

In [None]:
plot_monthlytyp('👏')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '👏'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# now we'll repeat the process for emojis that were found to be typical in the top 10 countries by userdays

In [None]:
top_countries = ["United Kingdom","Spain","France",
    "Germany",
    "Italy",
    "Turkey",
    "Netherlands",
    "Belgium",
    "Switzerland",
    "Austria"]

In [None]:
# read data in from the raw data analysis
country_typ = {}
for country in top_countries:
    country_typ[country] = pd.read_csv(r"C:\Users\saman\OneDrive\Documents\Thesis\Data\TypicalEmojis_ByCountry\EmojiTypicality_" + country + ".csv", index_col=False)
pos_country_typ = {}
for country, df in country_typ.items():
    pos_df = df.drop(df.index[df['Typicality'] < 0])
    pos_country_typ[country] = pos_df
    
pos_country_typ

### due to time constraints, I will just investigate the top 3 most typical emojis per country

# United Kingdom

In [None]:
United Kingdom
🤱 :breast-feeding:    1.831311
🧁 :cupcake:    1.136168
🏌️‍♂️ :man_golfing:    1.128539

In [None]:
bf = gdf[gdf['emoji generic'].str.contains('🤱')]
most_common_hashtags(bf['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🤱')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a high topical consistency of over 99%. Posts with this emoji seem to relate to the topic of babies.  Posts containing this emoji can reasonably be assumed to be discussing this topic.

In [None]:
plot_count('🤱')

In [None]:
plot_monthlytyp('🤱')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🤱'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")

In [None]:
cupcake = gdf[gdf['emoji'].str.contains('🧁')]
most_common_hashtags(cupcake['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🧁')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 55%. Posts with this emoji seem to relate to cupcakes.

In [None]:
plot_count('🧁')

In [None]:
plot_monthlytyp('🧁')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🧁'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
golf = gdf[gdf['emoji'].str.contains('🏌️‍♂️')]
most_common_hashtags(golf['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🏌️‍♂️')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 89.8%. Posts with this emoji seem to relate to golf. Posts containing this emoji can reasonably be assumed to be discussing this topic.

In [None]:
plot_count('🏌️‍♂️')

In [None]:
plot_monthlytyp('🏌️‍♂️')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🏌️‍♂️'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of Man Golfing Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_man-golfing.png", dpi=300, bbox_inches = "tight")
    

# Spain

In [None]:
Spain
🕗 :eight_o’clock:    3.214485
👩‍⚕️ :woman_health_worker:    2.661256
🏥 :hospital:    2.175691

In [None]:
eight = gdf[gdf['emoji'].str.contains('🕗')]
most_common_hashtags(eight['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🕗')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 78%. Posts with this emoji seem to relate to the concept of clapping for healthcare workers at 8pm each evening during the pandemic. 

In [None]:
plot_count('🕗')

In [None]:
plot_monthlytyp('🕗')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🕗'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
whw = gdf[gdf['emoji generic'].str.contains('👩‍⚕️')]
most_common_hashtags(whw['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('👩‍⚕️')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 95.5%. Posts with this emoji seem to relate to appreciation for nurses during the pandemic. Posts containing this emoji can reasonably be assumed to be discussing this topic.

In [None]:
plot_count('👩‍⚕️')

In [None]:
plot_monthlytyp('👩‍⚕️')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '👩‍⚕️'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
hosp = gdf[gdf['emoji'].str.contains('🏥')]
most_common_hashtags(hosp['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🏥')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 88%. Posts with this emoji seem to relate to covid-19. Posts containing this emoji can reasonably be assumed to be discussing this topic.

In [None]:
plot_count('🏥')

In [None]:
plot_monthlytyp('🏥')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🏥'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# France

In [None]:
France
🗳️ :ballot_box_with_ballot:    3.254025
💬 :speech_balloon:    3.192003
⤵️ :right_arrow_curving_down:    1.618778

In [None]:
ballot = gdf[gdf['emoji'].str.contains('🗳️')]
most_common_hashtags(ballot['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🗳️')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 72%. Posts with this emoji seem to relate to various elections across Europe. Posts containing this emoji can reasonably be assumed to be discussing this topic.

In [None]:
plot_count('🗳️')

In [None]:
plot_monthlytyp('🗳️')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🗳️'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
speech = gdf[gdf['emoji'].str.contains('💬')]
most_common_hashtags(speech['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('💬')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 27%. Posts with this emoji don't seem to have a topic in common. 

In [None]:
plot_count('💬')

In [None]:
plot_monthlytyp('💬')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '💬'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
racd = gdf[gdf['emoji'].str.contains('⤵️')]
most_common_hashtags(racd['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('⤵️')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 34%. Posts with this emoji don't seem to have topics in common.

In [None]:
plot_count('⤵️')

In [None]:
plot_monthlytyp('⤵️')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '⤵️'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Germany

In [None]:
🤮 :face_vomiting:    1.416614
☝️ :index_pointing_up:    1.327176
🤓 :nerd_face:    0.869367

In [None]:
vom = gdf[gdf['emoji'].str.contains('🤮')]
most_common_hashtags(vom['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🤮')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 24%. Posts with this emoji don't seem to have topics in common.

In [None]:
plot_count('🤮')

In [None]:
plot_monthlytyp('🤮')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🤮'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
ipu = gdf[gdf['emoji'].str.contains('☝️')]
most_common_hashtags(ipu['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('☝️')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 33%. Posts with this emoji don't seem to have topics in common.

In [None]:
plot_count('☝️')

In [None]:
plot_monthlytyp('☝️')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '☝️'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
nerd = gdf[gdf['emoji'].str.contains('🤓')]
most_common_hashtags(nerd['hashtags'], 20)

In [None]:
subset = df[df['emoji generic'].str.contains('🤓')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 55%. Posts with this emoji seem to discuss topics considered "nerdy", like reading and playing video games.

In [None]:
plot_count('🤓')

In [None]:
plot_monthlytyp('🤓')

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🤓'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Italy

In [None]:
💣 :bomb:    1.484113
🤡 :clown_face:    1.071201
💎 :gem_stone:    0.807681

In [None]:
bomb = gdf[gdf['emoji'].str.contains('💣')]
print(most_common_hashtags(bomb['hashtags'], 20))

plot_count('💣')

plot_monthlytyp('💣')

In [None]:
subset = df[df['emoji generic'].str.contains('💣')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 72%. Posts with this emoji seem to discuss electronic music.

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '💣'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
clown = gdf[gdf['emoji'].str.contains('🤡')]
print(most_common_hashtags(clown['hashtags'], 20))

plot_count('🤡')

plot_monthlytyp('🤡')

In [None]:
subset = df[df['emoji generic'].str.contains('🤡')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 18%. Posts with this emoji don't seem to discuss common topics. 

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🤡'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
gem = gdf[gdf['emoji'].str.contains('💎')]
print(most_common_hashtags(gem['hashtags'], 20))

plot_count('💎')

plot_monthlytyp('💎')

In [None]:
subset = df[df['emoji generic'].str.contains('💎')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 56.5%. Posts with this emoji seem to discuss jewlery.

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '💎'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Turkey

👄 :mouth:    3.235072
🔞 :no_one_under_eighteen:    3.066475
🎀 :ribbon:    2.975887

In [None]:
mouth = gdf[gdf['emoji'].str.contains('👄')]
print(most_common_hashtags(mouth['hashtags'], 20))

plot_count('👄')

plot_monthlytyp('👄')

In [None]:
subset = df[df['emoji generic'].str.contains('👄')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 2%. Posts with this emoji don't seem to discuss similar topics. Many of the hashtags associated with this emoji are names of Turkish cities.

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '👄'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
no18 = gdf[gdf['emoji'].str.contains('🔞')]
print(most_common_hashtags(no18['hashtags'], 20))

plot_count('🔞')

plot_monthlytyp('🔞')

In [None]:
subset = df[df['emoji generic'].str.contains('🔞')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 35% relating to sex work. 

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🔞'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
ribbon = gdf[gdf['emoji'].str.contains('🎀')]
print(most_common_hashtags(ribbon['hashtags'], 20))

plot_count('🎀')

plot_monthlytyp('🎀')

In [None]:
subset = df[df['emoji generic'].str.contains('🎀')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 55% relating to sex work.

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🎀'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Netherlands

In [None]:
Netherlands
🚁 :helicopter:    7.370691
🚒 :fire_engine:    0.221937,
 

In [None]:
heli = gdf[gdf['emoji'].str.contains('🚁')]
print(most_common_hashtags(heli['hashtags'], 20))

plot_count('🚁')

plot_monthlytyp('🚁')

In [None]:
subset = df[df['emoji generic'].str.contains('🚁')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 82%. Posts with this emoji seem to relate to p2000, which is a Dutch emergency alert system. 

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🚁'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
fe = gdf[gdf['emoji'].str.contains('🚒')]
print(most_common_hashtags(fe['hashtags'], 20))

plot_count('🚒')

plot_monthlytyp('🚒')

In [None]:
subset = df[df['emoji generic'].str.contains('🚒')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 69%. Posts with this emoji seem to relate to p2000, which is a Dutch emergency alert system. 

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🚒'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

# Belgium

In [None]:
🖤 :black_heart:    0.636208
🤔 :thinking_face:    0.322439
😎 :smiling_face_with_sunglasses:    0.124985

In [None]:
bh = gdf[gdf['emoji'].str.contains('🖤')]
print(most_common_hashtags(bh['hashtags'], 20))

plot_count('🖤')

plot_monthlytyp('🖤')

In [None]:
subset = df[df['emoji generic'].str.contains('🖤')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 24.7%. Posts with this emoji don't seem to discuss similar topics. 

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🖤'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
think = gdf[gdf['emoji'].str.contains('🤔')]
print(most_common_hashtags(think['hashtags'], 20))

plot_count('🤔')

plot_monthlytyp('🤔')

In [None]:
subset = df[df['emoji generic'].str.contains('🤔')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 23%. Posts with this emoji seem to relate to covid-19.

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '🤔'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
sunglasses = gdf[gdf['emoji'].str.contains('😎')]
print(most_common_hashtags(sunglasses['hashtags'], 20))

plot_count('😎')

plot_monthlytyp('😎')

In [None]:
subset = df[df['emoji generic'].str.contains('😎')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 24%. Posts containing this emoji don't seem to discuss similar topics.

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '😎'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
Switzerland
💙 :blue_heart:     0.11247,

In [None]:
blueheart = gdf[gdf['emoji'].str.contains('💙')]
print(most_common_hashtags(blueheart['hashtags'], 20))

plot_count('💙')

plot_monthlytyp('💙')

In [None]:
subset = df[df['emoji generic'].str.contains('💙')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 34% relating to infant and child mortality and health problems. 

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '💙'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    

In [None]:
Austria
😍 :smiling_face_with_heart-eyes:    0.106052


In [None]:
hearteyes = gdf[gdf['emoji'].str.contains('😍')]
print(most_common_hashtags(hearteyes['hashtags'], 20))

plot_count('😍')

plot_monthlytyp('😍')

In [None]:
subset = df[df['emoji generic'].str.contains('😍')]
make_wordcloud(subset['hashtags'])

This emoji demonstrates a topical consistency of 13%. Posts containing this emoji don't seem to discuss similar topics.

In [None]:
# plot spatial typicality (no temporal subsets)
EMOJI = '😍'

emojiname_us = emoji.demojize(str(EMOJI)).replace(":","")
emojiname = emojiname_us.replace("_", " ")
emojiname = emojiname.title()

#calculating frequency for total dataset     
n_t = count[EMOJI]
N_t = sum(count.values())
F_t = n_t/N_t

typ = pd.DataFrame(index = common_idx, columns = ['typicality'], data = '') #dummy dataframe to hold the typicality values

for idx,midx in enumerate(common_idx): #looping through all the common indexes between the grids and dataframe
    grid_typicality(gdf.loc[midx,"emoji"], common_idx[idx])

geom = grid.loc[common_idx, "geometry"]
typ_gdf = gp.GeoDataFrame(data = typ['typicality'], geometry =geom, crs = CRS_PROJ)

base = grid.plot(figsize=(22,28), color='white', alpha=0)
base.grid(False)
# Hide axes ticks
base.axes.xaxis.set_visible(False)
base.axes.yaxis.set_visible(False)
# combine with europe geometry
countries_gdf.boundary.plot(ax=base, edgecolor='dimgray', linewidth=0.7, zorder=10) 
# add labels using adjust text
for x, y, label in zip(country_labels.geometry.x, country_labels.geometry.y, country_labels["NAME_EN"]):
    texts.append(plt.text(x-100000, y, label.upper(), fontsize = 14, color="dimgray", zorder=10))
plot = typ_gdf.plot(ax=base, column = 'typicality', colormap=typ_cmap, alpha = 0.85, zorder=5)

plt.title("Spatial Typicality of " + emojiname + " Emoji", size =35)
plt.grid(False)
fig = plot.get_figure()
fig.savefig(r"C:\Users\saman\OneDrive\Documents\Thesis\Figures\Emoji_Specific_Analysis\plot_spatialtyp\plot_spatialtyp_" + emojiname_us + ".png", dpi=300, bbox_inches = "tight")
    