# Dates Bar Charts - Fake Covid-19 Dataset

### fake_dates_bar_chart.py and fake_dates_area_chart.py

In order to create the bar and area charts, we need the following packages:

In [1]:
from collections import Counter
import pandas as pd
#import numpy as np
import json
#import sys
#import string
#import re
import itertools  
#from emot.emo_unicode import UNICODE_EMO, EMOTICONS
#import emoji
#from wordcloud import WordCloud, STOPWORDS
#import matplotlib.pyplot as plt 
import nltk
#from PIL import Image
import altair as alt
import csv
#import itertools

To read the JSON file that has all the tweets, it is necessary to do:

In [2]:
data = []
with open('dataset/fakecovid_result_translated_full.json', 'r') as f:
    for line in f:
        data.append(json.loads(line))

### Bar chart: "Number of Tweets per day of the week"

We're interested in the "created_at" field:

In [3]:
index = 0
new = []
for element in data:
    token = data[index]['created_at']
    new.append(token[0:3])
    index = index + 1

#print(new)
#print(len(new))

count = Counter(new)
#print(count)

The bar chart is created:

In [4]:
df = pd.DataFrame.from_dict(count, orient='index').reset_index()
df = df.rename(columns={'index':'Day',0:'Tweet count'})

print(df)

chart = alt.Chart(df).mark_bar().encode(
    x = alt.X('Tweet count:Q'),
    y = alt.Y('Day', sort = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']),
    color=alt.Color('Tweet count:Q',scale=alt.Scale(scheme="blues"))
).transform_window(
    rank='rank(count)',
    sort=[alt.SortField('count', order='descending')]
).properties(width=700, height=300,  title = "Number of Tweets per day of the week")

chart.show()

   Day  Tweet count
0  Tue          242
1  Sat          176
2  Sun          194
3  Wed          259
4  Thu          231
5  Fri          201
6  Mon          248
Displaying chart at http://localhost:20856/


### Area chart: "Trend of the number of Tweets during the year"

We're interested in the "created_at" field:

In [5]:
index = 0
new = []
for element in data:
    token = data[index]['created_at']
    new.append(token[4:7])
    index = index + 1

#print(new)
#print(len(new))

count = Counter(new)
#print(count)

The area chart is created:

In [6]:
df = pd.DataFrame.from_dict(count, orient='index').reset_index()
df = df.rename(columns={'index':'Month',0:'Tweet count'})

print(df)

chart = alt.Chart(df).mark_area(
    point= True,
    line={'color':'dodgerblue'},
    color=alt.Gradient(
        gradient='linear',
        stops=[alt.GradientStop(color='white', offset=0),
               alt.GradientStop(color='mediumturquoise', offset=1)],
        x1=1,
        x2=1,
        y1=1,
        y2=0
    )
).properties(width=500, height=300,  title = "Trend of the number of Tweets during the year").encode(
    alt.X('Month', sort = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep']),
    alt.Y('Tweet count:Q'),
    tooltip=['Month', 'Tweet count']
).interactive()

chart.show()

  Month  Tweet count
0   Feb           92
1   Mar          597
2   Apr          381
3   May          237
4   Jun           85
5   Jul           38
6   Aug           44
7   Jan           76
8   Sep            1
Displaying chart at http://localhost:20856/


# Marco

In [7]:
csv_dataframe = pd.read_csv('dataset/fakecovid_filtered_dataset_clean.csv',sep=";")
csv_dataframe['tweet_id'] = csv_dataframe['tweet_id'].astype(str)
csv_list = csv_dataframe.values.tolist()
lista_unica_csv=list(itertools.chain.from_iterable(csv_list))

data = []
with open('dataset/fakecovid_result_translated_full.json', 'r') as f:
    for line in f:
        data.append(json.loads(line))

index=0
cmt_list = []
#stopwords = set(STOPWORDS) 
for element in data:
    token_id = data[index]['id_str']
    indice_csv = lista_unica_csv.index(token_id)
    token = token=data[index]['created_at']
    final_token = token[4:7] + " " +lista_unica_csv[indice_csv+1].lower().replace(" ", "")
    cmt_list.append(final_token)
    index=index+1
    
#print(cmt_list)
fdist = dict(nltk.FreqDist(cmt_list))
df = pd.DataFrame.from_dict(fdist, orient='index').reset_index()
df = df.rename(columns={'index':'month', 0:'count'})
col_one_list = df['month'].tolist()
col_two_list = df['count'].tolist()

typelist=[]
namelist=[]

index = 0

count_false = [0] * len(col_one_list)
count_part = [0] * len(col_one_list)
count_true = [0] * len(col_one_list)
count_other = [0] * len(col_one_list)
count_unproven = [0] * len(col_one_list)
#print("count false len "+str(len(count_false)))

for el in col_one_list:
    tok = el.split()
    namelist.append(tok[0])
    #typelist.append(tok[1])
    if tok[0] in namelist:
        indx = namelist.index(tok[0])
        if tok[1] == "false":
            count_false[indx] = col_two_list[index]
        elif tok[1] == "partiallyfalse":
            count_part[indx] = col_two_list[index]
        elif tok[1] == "unproven":
            count_unproven[indx] = col_two_list[index]
        elif tok[1] == "others":
            count_other[indx] = col_two_list[index]
        elif tok[1] == "true":
            count_true[indx] = col_two_list[index]
        else:
            print("errore count")
   
    index = index + 1

i=0
for el in col_two_list:
    col_two_list[i] = count_false[i] + count_part[i] + count_unproven[i] + count_other[i] + count_true[i]
    i = i + 1

#print("count false len "+str(len(count_false)))
df['month']=namelist
df['Count False Tweets']=count_false
df['Count Partially False Tweets']=count_part
df['Count Unproven Tweets']=count_unproven
df['Counter Other Tweets']=count_other
df['Counter True Tweets']=count_true
df['count'] = col_two_list
#del df['count']


#df['type']=typelist

df = df.sort_values(by=['count'],ascending=[False])

#print(df.head(20))

bars = alt.Chart(df).transform_fold(
    ['Count False Tweets', 'Count Partially False Tweets', 'Count Unproven Tweets', 'Counter Other Tweets', 'Counter True Tweets'],
).mark_bar().encode(
    x=alt.X('value:Q'),
    y=alt.Y('month:N'),
    color=alt.Color('key:N')
).transform_filter(
    alt.FieldRangePredicate(field='count', range=[1, 1000])
).properties(title="aaa")


bars