# Visualization of twitter hashtags popularity

## change cells size

In [14]:
display(HTML("<style>.container { width:100% !important; }</style>"))

## imports and set finals

In [1]:
import pandas as pd
import numpy as np
import folium
from folium.plugins import HeatMap
from IPython.core.display import display, HTML

In [2]:
LATITUDE_INIT=53.4807593
LONGITUDE_INIT=-2.2426305
ZOOM=6
TWEET_CONTENT_COL='Tweet content'

In [3]:
hashtags_content_df = pd.read_excel('files/export_dashboard.xlsx', sheet_name='Stream')
top_hashtags_df = pd.read_excel('files/export_content.xlsx', sheet_name='Top Hashtags')

In [4]:
print(top_hashtags_df.head())

         Hash  Repeats
0  nowplaying     5300
1      Hiring     2920
2    areacode     2407
3         tnc     2407
4         job     2157


## define functions

In [5]:
import re
def get_coordinates(hashtags_df, column_name, hashtag):
    indexes_ser = hashtags_df[column_name].str.contains('#' + hashtag, case=False, regex=False)#flags=re.IGNORECASE
    indexes_arr=indexes_ser[indexes_ser==True].index
    return hashtags_df[['Latitude', 'Longitude']].iloc[indexes_arr]

In [6]:
def get_new_map(hashtags_df, column_name, new_hashtag):
    new_coordinates = get_coordinates(hashtags_df, column_name, new_hashtag)
    m = folium.Map([LATITUDE_INIT, LONGITUDE_INIT], zoom_start=ZOOM)
    m.add_child(HeatMap(new_coordinates.values))
    print('hashtag: #{}, occurrences: {}'.format(new_hashtag, int(new_coordinates.values.size/2)))
    return m

In [7]:
top_hashtags_df.loc[:,'Hash']=top_hashtags_df.loc[:,'Hash'].str.lower()
top_hashtags_df.head()

Unnamed: 0,Hash,Repeats
0,nowplaying,5300
1,hiring,2920
2,areacode,2407
3,tnc,2407
4,job,2157


## make case insensitive

In [8]:
names = []
sums = []
for name, group in top_hashtags_df.groupby(['Hash']):
    names.append(name)
    sums.append(sum(group['Repeats'])) 

data = {top_hashtags_df.columns.values[0]: names, top_hashtags_df.columns.values[1]: sums}

new_df = pd.DataFrame(data=data)

In [9]:
hashtags = new_df.sort_values(by=['Repeats'], ascending = False)
hashtags.head()

Unnamed: 0,Hash,Repeats
59,nowplaying,5300
46,job,4311
50,london,3899
42,hiring,3741
80,tnc,2407


## visualization

In [15]:
import ipywidgets as widgets

label = widgets.Label(value='Choose hashtag:')

dropdown = widgets.Dropdown(
    options=list(hashtags['Hash'].values),
    disabled=False,
)

def on_value_change(change):
    new_map = get_new_map(hashtags_content_df, TWEET_CONTENT_COL, change['new'])
    display(new_map)
    
dropdown.observe(on_value_change, names='value')

m = folium.Map([LATITUDE_INIT, LONGITUDE_INIT], zoom_start=ZOOM)
top_hashtag = hashtags['Hash'].values[0]
coordinates = get_coordinates(hashtags_content_df, TWEET_CONTENT_COL, top_hashtag)
m.add_child(HeatMap(coordinates.values))

display(label)
display(dropdown)
print('hashtag: #{}, occurrences: {}'.format(top_hashtag, int(coordinates.values.size/2)))
display(m)

hashtag: #nowplaying, occurrences: 5304


In [None]:
%lsmagic