In [51]:
import pandas as pd
import gzip
import os
import geopandas as gpd
from tqdm import tqdm
import numpy as np
import plotly.graph_objects as go
import plotly

In [52]:
DIR = '/home/jovyan/work'
if os.getcwd() != DIR:
    os.chdir(DIR)

In [53]:
tqdm.pandas()


The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version



## TWEETS DATA

In [54]:
DAYS = ['24', '25', '26', '27','28']

In [55]:
def get_top_tags_dict(df):

    countries = df['country'].unique()
    top_tags = {}

    for i in tqdm(range(len(countries))):

        country = countries[i]
        tags = list(df[df['country']==country]['keywords'])

        flat_tags = []
        for sublist in tags:
            for item in sublist:
                flat_tags.append(item)

        top = list(set(flat_tags))
        top.sort(key=flat_tags.count, reverse=True)
        top = top[0:3]

        top_tags[country] = top

    return top_tags

In [56]:
def get_text(row, tags):
    return ''.join([
        '<b>Tweets:</b><br>',
        str(row['keywords']),
        '<br><b>Top 3 tags:</b><br>',
        '<br>'.join(tags)
    ])

In [57]:
data = []

for day in DAYS:

    print('Reading file for day '+day+'...')

    with gzip.open('./data/covid19_2020_04_'+day+'.json.gz') as f:
        df = pd.read_json(f, lines=True)

    print('Processing data...')

    df = df.dropna()

    df['country'] = df.progress_apply(lambda row: row['location']['country'], axis=1)
    df['day'] = df.progress_apply(lambda row: row['date'].day, axis=1)
    df = df[df['day']==int(day)]
    df = df.drop(columns=['tweet_id', 'user_id', 'location', 'date', 'day'])

    top_tags = get_top_tags_dict(df)
    df = df.groupby(['country']).count().reset_index(inplace=False)
    df['keywords_log'] = df.progress_apply(lambda x: np.log(x['keywords']), axis=1)
    df['text'] = df.progress_apply(lambda x: get_text(x, top_tags[x['country']]), axis=1)

    data.append(
        dict(type='choropleth',
             locations=df['country'].astype(str),
             z=df['keywords_log'].astype(float),
             locationmode='country names',
             hoverinfo='location+text',
             text=df['text'],
             zmin=0,
             zmax=13)

    )

Reading file for day 24...
Processing data...
100%|██████████| 547906/547906 [00:10<00:00, 51296.30it/s]
100%|██████████| 547906/547906 [00:13<00:00, 41797.56it/s]
100%|██████████| 183/183 [00:16<00:00, 10.81it/s]
100%|██████████| 183/183 [00:00<00:00, 24811.15it/s]
100%|██████████| 183/183 [00:00<00:00, 19396.97it/s]
Reading file for day 25...
Processing data...
100%|██████████| 547729/547729 [00:11<00:00, 46317.28it/s]
100%|██████████| 547729/547729 [00:12<00:00, 43675.63it/s]
100%|██████████| 184/184 [00:15<00:00, 11.93it/s]
100%|██████████| 184/184 [00:00<00:00, 31716.27it/s]
100%|██████████| 184/184 [00:00<00:00, 23890.29it/s]
Reading file for day 26...
Processing data...
100%|██████████| 531826/531826 [00:11<00:00, 48257.09it/s]
100%|██████████| 531826/531826 [00:11<00:00, 45352.05it/s]
100%|██████████| 184/184 [00:14<00:00, 12.61it/s]
100%|██████████| 184/184 [00:00<00:00, 27905.41it/s]
100%|██████████| 184/184 [00:00<00:00, 23317.18it/s]
Reading file for day 27...
Processing da

## PLOTLY MAP

In [67]:
steps = []
for i in range(len(data)):
    step = dict(
        method='restyle',
        args=['visible', [False] * len(data)],
        label='{}.04'.format(i+int(DAYS[0]))
    )
    step['args'][1][i] = True
    steps.append(step)

sliders = [
    dict(
        active=0,
        pad={"t": 1},
        steps=steps
    )
]  

layout = dict(
    geo=dict(
        scope='world',
        projection={'type': 'natural earth'}
    ),
    sliders=sliders,
    title='COVID RELATED TWEETS NUMBER IN NATURAL LOG SCALE<br>FOR 24.04.2020 - 28.04.2020'
)

In [68]:
fig = dict(
    data=data, 
    layout=layout
)

In [69]:
plotly.offline.iplot(fig)

In [70]:
plotly.io.write_html(fig, './map.html')