# Fake Heat Map

### fake_heatmap.py

In order to create the heatmap, we need the following packages:

In [1]:
import json
from collections import Counter
import pandas as pd
import altair as alt
import datetime
from dateutil.parser import parse
from itertools import groupby
from operator import itemgetter

To read the JSON file that has all the tweets, it is necessary to do:

In [2]:
data = []
with open('dataset/fakecovid_result_final_translated_full.json', 'r') as f:
    for line in f:
        data.append(json.loads(line))


We're interested in the "created_at" field.

In [3]:
lista = []
index = 0

for element in data: 
    token=data[index]['created_at']
    d = parse(token)
    d = d.strftime('%Y/%m/%d')
    lista.append(d)
    index=index+1

After converting the related date into a string, the number of occurrences in the created 'lista' has been counted.
Then the DataFrame, that is a 2-dimensional labeled data structure, was generated from the array 'lista'.

In [4]:
count=Counter(lista)

start = datetime.datetime.strptime("2020/01/01", "%Y/%m/%d")
end = datetime.datetime.strptime("2020/09/01", "%Y/%m/%d")
date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]

for date in date_generated:
    if date.strftime("%Y/%m/%d") not in count:
        count[date.strftime("%Y/%m/%d")] = 0

df = pd.DataFrame.from_dict(count, orient='index').reset_index()
df = df.rename(columns={'index':'data', 0:'tweet_count'})

The heatmap is created and visualized through the 'altair_viewer':

In [5]:
chart = alt.Chart(
    df,
    title="Tweet count heatmap"
).mark_rect().encode(
    x='date(data):O',
    y='month(data):O',
    color=alt.Color('tweet_count:Q', scale=alt.Scale(scheme="turbo")),
    tooltip=[
        alt.Tooltip('monthdate(data):T', title='Date'),
        alt.Tooltip('tweet_count:Q', title='Tweet Count')
    ]
).properties(width=800, height=300)

# Configure text
text = chart.mark_text(baseline='middle').encode(
    text='tweet_count:Q',
    color=alt.condition(
        alt.FieldRangePredicate(field='tweet_count', range=[12,25]),
        alt.value('black'),#if true
        alt.value('white')#if false
    )
)

chart + text

### Heatmap - Time/Day of the week

The code is pretty much the same as the above. The heatmap now shows a peak in the number of tweets created after 10 am every day of the week.

In [6]:
lista_ora = []
index_ora = 0

for element in data: 
    token=data[index_ora]['created_at']
    day = parse(token)
    day = day.strftime('%a')
    
    hour = parse(token)
    hour = hour.strftime('%H')
    
    lista_ora.append(day + " "+ hour)
    index_ora=index_ora+1


count_ora=Counter(lista_ora)

df_ora = pd.DataFrame.from_dict(count_ora, orient='index').reset_index()
df_ora = df_ora.rename(columns={'index':'data', 0:'tweet_count'})
columnone = df_ora['data'].tolist()

dlist = []
hlist = []
for el in columnone:
    t = el.split()
    dlist.append(t[0])
    hlist.append(t[1])
    
df_ora['data'] = dlist
df_ora['ora'] = hlist

chart = alt.Chart(
    df_ora,
    title="Time/Day of the week heatmap"
).mark_rect().encode(
    x='ora:N',
    y=alt.Y('data:N',sort=['Mon','Tue','Wed','Thu','Fri','Sat','Sun']),
    color=alt.Color('tweet_count:Q', scale=alt.Scale(scheme="turbo")),
    tooltip=[
        alt.Tooltip('data:N', title='Date'),
        alt.Tooltip('tweet_count:Q', title='Tweet Count')
    ]
).properties(width=800, height=300)

# Configure text
text = chart.mark_text(baseline='middle').encode(
    text='tweet_count:Q',
    color=alt.condition(
        alt.FieldRangePredicate(field='tweet_count', range=[9,20]),
        alt.value('black'),#if true
        alt.value('white')#if false
    )
)

chart + text

### HeatMap Engagement - Number of Likes generated by all the tweets in the dataset for each date

The code is the pretty much the same as above, in this case we have considered the 'favorite count' field and worked on it.

In [7]:
likes = []
index = 0

for element in data: 
    token=data[index]['created_at']
    d = parse(token)
    d = d.strftime('%Y/%m/%d')
    like=(d,data[index]['favorite_count'])
    
    likes.append(like)
    index=index+1


first = itemgetter(0)

sums = [(k, sum(item[1] for item in tups_to_sum))
        for k, tups_to_sum in groupby(sorted(likes, key=first), key=first)]
dictsum=dict(sums)


start = datetime.datetime.strptime("2020/01/01", "%Y/%m/%d")
end = datetime.datetime.strptime("2020/09/01", "%Y/%m/%d")
date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]

for date in date_generated:
    if date.strftime("%Y/%m/%d") not in dictsum:
        dictsum[date.strftime("%Y/%m/%d")] = 0


df = pd.DataFrame.from_dict(dictsum, orient='index').reset_index()
df = df.rename(columns={'index':'data', 0:'likes_count'})


chart = alt.Chart(
    df,
    title="Likes heatmap"
).mark_rect().encode(
    x='date(data):O',
    y='month(data):O',
    color=alt.Color('likes_count:Q', scale=alt.Scale(scheme="turbo")),
    tooltip=[
        alt.Tooltip('monthdate(data):T', title='Date'),
        alt.Tooltip('likes_count:Q', title='Likes Count')
    ]
).properties(width=1500, height=500)

# Configure text
text = chart.mark_text(baseline='middle').encode(
    text='likes_count:Q',
    color=alt.condition(
        alt.FieldRangePredicate(field='likes_count', range=[600000,730000]),
        alt.value('black'),
        alt.value('white')
    )
)

chart + text

### HeatMap Engagement - Number of Retweets generated by all the tweets in the dataset for each date

The code is the pretty much the same as above, in this case we have considered the 'favorite count' field and worked on it.

In [8]:
rts = []
index = 0

for element in data: 
    token=data[index]['created_at']
    d = parse(token)
    d = d.strftime('%Y/%m/%d')
    rt=(d,data[index]['retweet_count'])
    
    rts.append(rt)
    index=index+1
    


first_rt = itemgetter(0)
sums_rt = [(k, sum(item[1] for item in tups_to_sum))
        for k, tups_to_sum in groupby(sorted(rts, key=first_rt), key=first_rt)]
dictsum_rt=dict(sums_rt)


start = datetime.datetime.strptime("2020/01/01", "%Y/%m/%d")
end = datetime.datetime.strptime("2020/09/01", "%Y/%m/%d")
date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]

for date in date_generated:
    if date.strftime("%Y/%m/%d") not in dictsum_rt:
        dictsum_rt[date.strftime("%Y/%m/%d")] = 0


df_rt = pd.DataFrame.from_dict(dictsum_rt, orient='index').reset_index()
df_rt = df_rt.rename(columns={'index':'data', 0:'retweet_count'})


c = alt.Chart(
    df_rt,
    title="Retweets heatmap"
).mark_rect().encode(
    x='date(data):O',
    y='month(data):O',
    color=alt.Color('retweet_count:Q', scale=alt.Scale(scheme="turbo")),
    tooltip=[
        alt.Tooltip('monthdate(data):T', title='Date'),
        alt.Tooltip('retweet_count:Q', title='Retweet Count')
    ]
).properties(width=1500, height=500)

# Configure text
t = c.mark_text(baseline='middle').encode(
    text='retweet_count:Q',
    color=alt.condition(
        alt.FieldRangePredicate(field='retweet_count', range=[100000,250000]),
        alt.value('black'),
        alt.value('white')
    )
)

c + t