# Twitter Sentiment Analysis - Fake Covid-19 dataset

We've used the following packages:

In [1]:
import plotly.express as px
from textblob import TextBlob
import sys
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import nltk
import pycountry
import re
import string
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from langdetect import detect
from nltk.stem import SnowballStemmer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import CountVectorizer
from collections import Counter
import json
from emot.emo_unicode import UNICODE_EMO, EMOTICONS
import emoji
from nltk.corpus import stopwords
import itertools
import altair as alt

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

Then we have defined the following functions to clean the tweets' text:

In [2]:
def remove_emoticons(text):
    emoticon_pattern = re.compile(u'(' + u'|'.join(k for k in EMOTICONS) + u')')
    return emoticon_pattern.sub(r'', text)

def remove_emoji(text):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

def remove_urls(text):
    result = re.sub(r"http\S+", "", text)
    return(result)

def remove_twitter_urls(text):
    clean = re.sub(r"pic.twitter\S+", "",text)
    return(clean)

def give_emoji_free_text(text):
    return emoji.get_emoji_regexp().sub(r'', text)

def noamp(text):
    clean = re.sub("&amp", " ",text)
    return (clean)

In order to do the classification of the tweets, we need to read the csv file and the json file:

In [3]:
csv_dataframe = pd.read_csv('fakecovid/dataset/FINAL_fakecovid_final_filtered_dataset_clean.csv',sep=";")
csv_dataframe['tweet_id'] = csv_dataframe['tweet_id'].astype(str)
csv_list = csv_dataframe.values.tolist()
lista_unica_csv=list(itertools.chain.from_iterable(csv_list))

data = []
with open('fakecovid/dataset/fakecovid_result_final_translated_full.json', 'r') as f:
    for line in f:
        data.append(json.loads(line))
        
data1 = data

## Pie chart

We're interested in the "full_text" field, that it has been cleaned with specific functions.  
We've used **TextBlob** to calculate positive, negative, neutral, polarity and compound parameters from the text.

In [4]:
tweet_list = []
neutral_list = []
negative_list = []
positive_list = []

index=0
stop_words = stopwords.words('english')

for element in data:
    data[index]['full_text'] = data[index]['full_text'].lower()                 # Put everything in lowercase
    #data[index]['full_text'] = contractions.fix(data[index]['full_text'])
    data[index]['full_text'] = re.sub("\'\w+", '', data[index]['full_text'])    # Remove everything after '
    data[index]['full_text'] = remove_urls(data[index]['full_text'])
    data[index]['full_text'] = remove_twitter_urls(data[index]['full_text'])
    data[index]['full_text'] = remove_emoticons(data[index]['full_text'])
    data[index]['full_text'] = remove_emoji(data[index]['full_text'])
    data[index]['full_text'] = give_emoji_free_text(data[index]['full_text'])
    data[index]['full_text'] = noamp(data[index]['full_text'])                  # No amp with space
    data[index]['full_text'] = re.sub("#\S+", " ",  data[index]['full_text'])   # Remove hashtags
    data[index]['full_text'] = re.sub("@\S+", " ",  data[index]['full_text'])   # No mentions
    data[index]['full_text'] = data[index]['full_text'].translate(str.maketrans('', '', string.punctuation)) # No puntuaction
    data[index]['full_text'] = data[index]['full_text'].encode('ascii', 'ignore').decode() # No unicode
    data[index]['full_text'] = re.sub("^rt ", " ", data[index]['full_text'])    # No RT
    data[index]['full_text'] = re.sub('\s{2,}', " ", data[index]['full_text'])  # Remove big spaces

    tweet_list.append(data[index]['full_text'])
    analysis = TextBlob(data[index]['full_text'])
    score = SentimentIntensityAnalyzer().polarity_scores(data[index]['full_text'])
    neg = score['neg']
    neu = score['neu']
    pos = score['pos']
    
    if neg > pos:
        negative_list.append(data[index]['full_text'])

    elif pos > neg:
        positive_list.append(data[index]['full_text'])
    
    elif pos == neg:
        neutral_list.append(data[index]['full_text'])
    
    index=index+1           # Total number of tweets in the dataset.

We've reported the number of tweets (Positive, Negative, Neutral) in a DataFrame in order to visualise them:

In [5]:
print("Total number of tweets: ",len(tweet_list))
print("Positive number: ",len(positive_list))
print("Negative number: ", len(negative_list))
print("Neutral number: ",len(neutral_list))

category = []
count = []

categorypositive = ["Positive"] * len(positive_list)
categoryneutral = ["Neutral"] * len(neutral_list)
categorynegative = ["Negative"] * len(negative_list)

category = categorypositive + categoryneutral + categorynegative

count = Counter(category)

df_fake1 = pd.DataFrame.from_dict(count, orient='index').reset_index()
df_fake1 = df_fake1.rename(columns={'index':'Category', 0:'Count'})

Total number of tweets:  1454
Positive number:  601
Negative number:  559
Neutral number:  294


Then we have created the pie chart:

In [19]:
colors = ['#1AFF1A', '#005AB5', '#DC3220']
    
fig_fake1 = px.pie(df_fake1, values='Count', names='Category',
             title='Tweets percentage classified by sentiment - Fake Covid-19 dataset')
fig_fake1.update_traces(sort=False,textposition='inside', textinfo='percent+label',textfont_size=15,
                  marker=dict(colors=colors, line=dict(color='#000000', width=0.8)))
fig_fake1.show()

## Bar chart

In [20]:
tweet_list = []
neutral_list = []
negative_list = []
positive_list = []

pos_sum= 0
neu_sum= 0
neg_sum= 0
media_pos= 0
media_neu= 0
media_neg= 0

index=0
stop_words = stopwords.words('english')

for element in data:
    data[index]['full_text'] = data[index]['full_text'].lower()                 # Put everything in lowercase
    #data[index]['full_text'] = contractions.fix(data[index]['full_text'])
    data[index]['full_text'] = re.sub("\'\w+", '', data[index]['full_text'])    # Remove everything after '
    data[index]['full_text'] = remove_urls(data[index]['full_text'])
    data[index]['full_text'] = remove_twitter_urls(data[index]['full_text'])
    data[index]['full_text'] = remove_emoticons(data[index]['full_text'])
    data[index]['full_text'] = remove_emoji(data[index]['full_text'])
    data[index]['full_text'] = give_emoji_free_text(data[index]['full_text'])
    data[index]['full_text'] = noamp(data[index]['full_text'])                  # No amp with space
    data[index]['full_text'] = re.sub("#\S+", " ",  data[index]['full_text'])   # Remove hashtags
    data[index]['full_text'] = re.sub("@\S+", " ",  data[index]['full_text'])   # No mentions
    data[index]['full_text'] = data[index]['full_text'].translate(str.maketrans('', '', string.punctuation)) # No puntuaction
    data[index]['full_text'] = data[index]['full_text'].encode('ascii', 'ignore').decode() # No unicode
    data[index]['full_text'] = re.sub("^rt ", " ", data[index]['full_text'])    # No RT
    data[index]['full_text'] = re.sub('\s{2,}', " ", data[index]['full_text'])  # Remove big spaces

    tweet_list.append(data[index]['full_text'])
    analysis = TextBlob(data[index]['full_text'])
    score = SentimentIntensityAnalyzer().polarity_scores(data[index]['full_text'])
    #print(score)
    neg = score['neg']
    neg_sum= neg_sum + neg
    neu = score['neu']
    neu_sum = neu_sum + neu
    pos = score['pos']
    pos_sum = pos_sum + pos
    
    if neg > pos:
        negative_list.append(data[index]['full_text'])

    elif pos > neg:
        positive_list.append(data[index]['full_text'])
    
    elif pos == neg:
        neutral_list.append(data[index]['full_text'])
    
    index=index+1           # Total number of tweets in the dataset.
    
media_pos= pos_sum/len(positive_list)
media_neu= neu_sum/len(neutral_list)
media_neg= neg_sum/len(positive_list)

{'neg': 0.139, 'neu': 0.696, 'pos': 0.165, 'compound': 0.2023}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.149, 'neu': 0.851, 'pos': 0.0, 'compound': -0.4215}
{'neg': 0.066, 'neu': 0.635, 'pos': 0.299, 'compound': 0.8481}
{'neg': 0.0, 'neu': 0.639, 'pos': 0.361, 'compound': 0.8834}
{'neg': 0.123, 'neu': 0.846, 'pos': 0.031, 'compound': -0.5849}
{'neg': 0.156, 'neu': 0.844, 'pos': 0.0, 'compound': -0.4588}
{'neg': 0.0, 'neu': 0.781, 'pos': 0.219, 'compound': 0.8807}
{'neg': 0.0, 'neu': 0.967, 'pos': 0.033, 'compound': 0.0772}
{'neg': 0.055, 'neu': 0.823, 'pos': 0.122, 'compound': 0.4939}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.933, 'pos': 0.067, 'compound': 0.3818}
{'neg': 0.22, 'neu': 0.78, 'pos': 0.0, 'compound': -0.8834}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.078, 'neu': 0.922, 'pos': 0.0, 'compound': -0.296}
{'neg': 0.177, 'neu': 0.823, 'pos': 0.0, 'compound': -0.743}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0

{'neg': 0.207, 'neu': 0.793, 'pos': 0.0, 'compound': -0.8555}
{'neg': 0.0, 'neu': 0.838, 'pos': 0.162, 'compound': 0.6808}
{'neg': 0.0, 'neu': 0.769, 'pos': 0.231, 'compound': 0.4019}
{'neg': 0.279, 'neu': 0.465, 'pos': 0.256, 'compound': -0.0772}
{'neg': 0.0, 'neu': 0.907, 'pos': 0.093, 'compound': 0.5574}
{'neg': 0.203, 'neu': 0.797, 'pos': 0.0, 'compound': -0.7506}
{'neg': 0.168, 'neu': 0.762, 'pos': 0.07, 'compound': -0.4767}
{'neg': 0.095, 'neu': 0.859, 'pos': 0.046, 'compound': -0.3818}
{'neg': 0.0, 'neu': 0.92, 'pos': 0.08, 'compound': 0.5106}
{'neg': 0.202, 'neu': 0.699, 'pos': 0.099, 'compound': -0.4215}
{'neg': 0.041, 'neu': 0.828, 'pos': 0.131, 'compound': 0.3502}
{'neg': 0.091, 'neu': 0.909, 'pos': 0.0, 'compound': -0.296}
{'neg': 0.075, 'neu': 0.925, 'pos': 0.0, 'compound': -0.296}
{'neg': 0.0, 'neu': 0.748, 'pos': 0.252, 'compound': 0.6597}
{'neg': 0.064, 'neu': 0.936, 'pos': 0.0, 'compound': -0.2263}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu

{'neg': 0.155, 'neu': 0.777, 'pos': 0.068, 'compound': -0.4404}
{'neg': 0.088, 'neu': 0.717, 'pos': 0.195, 'compound': 0.4019}
{'neg': 0.27, 'neu': 0.73, 'pos': 0.0, 'compound': -0.9287}
{'neg': 0.066, 'neu': 0.778, 'pos': 0.156, 'compound': 0.4588}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.043, 'neu': 0.766, 'pos': 0.191, 'compound': 0.7351}
{'neg': 0.072, 'neu': 0.841, 'pos': 0.087, 'compound': 0.128}
{'neg': 0.0, 'neu': 0.952, 'pos': 0.048, 'compound': 0.0516}
{'neg': 0.0, 'neu': 0.874, 'pos': 0.126, 'compound': 0.6369}
{'neg': 0.0, 'neu': 0.9, 'pos': 0.1, 'compound': 0.6369}
{'neg': 0.0, 'neu': 0.789, 'pos': 0.211, 'compound': 0.4939}
{'neg': 0.0, 'neu': 0.876, 'pos': 0.124, 'compound': 0.5267}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.092, 'neu': 0.875, 'pos': 0.033, 'compound': -0.34}
{'neg': 0.134, 'neu': 0.767, 'pos': 0.099, 'compound': -0.0772}
{'neg': 0.0, 'neu': 0.866, 'pos': 0.

{'neg': 0.099, 'neu': 0.746, 'pos': 0.156, 'compound': 0.3832}
{'neg': 0.148, 'neu': 0.761, 'pos': 0.092, 'compound': -0.3612}
{'neg': 0.0, 'neu': 0.837, 'pos': 0.163, 'compound': 0.7906}
{'neg': 0.064, 'neu': 0.936, 'pos': 0.0, 'compound': -0.296}
{'neg': 0.058, 'neu': 0.838, 'pos': 0.104, 'compound': 0.3612}
{'neg': 0.147, 'neu': 0.714, 'pos': 0.139, 'compound': -0.0719}
{'neg': 0.0, 'neu': 0.887, 'pos': 0.113, 'compound': 0.5719}
{'neg': 0.0, 'neu': 0.899, 'pos': 0.101, 'compound': 0.5367}
{'neg': 0.125, 'neu': 0.745, 'pos': 0.13, 'compound': -0.2023}
{'neg': 0.29, 'neu': 0.657, 'pos': 0.053, 'compound': -0.9186}
{'neg': 0.189, 'neu': 0.701, 'pos': 0.11, 'compound': -0.5719}
{'neg': 0.0, 'neu': 0.889, 'pos': 0.111, 'compound': 0.4588}
{'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.058, 'neu': 0.811, 'pos': 0.13, 'compound': 0.4215}
{'neg': 0.102, 'neu': 0.898, 'pos': 0.0, 'compound': -0.6249}
{'neg': 0.0, 'neu': 0.728, 'pos': 0.272, 'compound': 0.4576}
{'neg': 0.208

{'neg': 0.116, 'neu': 0.884, 'pos': 0.0, 'compound': -0.6597}
{'neg': 0.0, 'neu': 0.791, 'pos': 0.209, 'compound': 0.7311}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.181, 'neu': 0.646, 'pos': 0.173, 'compound': -0.1027}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.764, 'pos': 0.236, 'compound': 0.7964}
{'neg': 0.099, 'neu': 0.901, 'pos': 0.0, 'compound': -0.296}
{'neg': 0.276, 'neu': 0.674, 'pos': 0.05, 'compound': -0.9201}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.127, 'neu': 0.81, 'pos': 0.063, 'compound': -0.3612}
{'neg': 0.128, 'neu': 0.812, 'pos': 0.06, 'compound': -0.4423}
{'neg': 0.073, 'neu': 0.927, 'pos': 0.0, 'compound': -0.296}
{'neg': 0.065, 'neu': 0.935, 'pos': 0.0, 'compound': -0.3491}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.905, 'pos': 0.095, 'compound': 0.

{'neg': 0.17, 'neu': 0.727, 'pos': 0.103, 'compound': -0.3995}
{'neg': 0.179, 'neu': 0.821, 'pos': 0.0, 'compound': -0.7506}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.811, 'pos': 0.189, 'compound': 0.6369}
{'neg': 0.123, 'neu': 0.877, 'pos': 0.0, 'compound': -0.2755}
{'neg': 0.045, 'neu': 0.835, 'pos': 0.12, 'compound': 0.5423}
{'neg': 0.132, 'neu': 0.828, 'pos': 0.04, 'compound': -0.4576}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.609, 'pos': 0.391, 'compound': 0.9254}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.577, 'pos': 0.423, 'compound': 0.8822}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.205, 'neu': 0.795, 'pos': 0.0, 'compound': -0.6597}
{'neg': 0.0, 'neu': 0.902, 'pos': 0.098, 'compound': 0.0772}
{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.4588}
{'neg': 0.082, 'neu': 0.918, 'pos': 0.0, 'compound': -0

{'neg': 0.236, 'neu': 0.675, 'pos': 0.089, 'compound': -0.7021}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.111, 'neu': 0.73, 'pos': 0.159, 'compound': 0.296}
{'neg': 0.084, 'neu': 0.916, 'pos': 0.0, 'compound': -0.5256}
{'neg': 0.0, 'neu': 0.732, 'pos': 0.268, 'compound': 0.9022}
{'neg': 0.171, 'neu': 0.829, 'pos': 0.0, 'compound': -0.7506}
{'neg': 0.0, 'neu': 0.868, 'pos': 0.132, 'compound': 0.4767}
{'neg': 0.057, 'neu': 0.883, 'pos': 0.06, 'compound': 0.0258}
{'neg': 0.128, 'neu': 0.76, 'pos': 0.112, 'compound': -0.1027}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.081, 'neu': 0.919, 'pos': 0.0, 'compound': -0.296}
{'neg': 0.0, 'neu': 0.792, 'pos': 0.208, 'compound': 0.4939}
{'neg': 0.035, 'neu': 0.965, 'pos': 0.0, 'compound': -0.0258}
{'neg': 0.072, 'neu': 0.688, 'pos': 0.241, 'compound': 0.7096}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.076, 'neu': 0.853, 'pos': 0.071, 'compound': 0.2023}
{'neg': 0.0, 'neu': 0.852, 'pos'

{'neg': 0.107, 'neu': 0.655, 'pos': 0.238, 'compound': 0.4215}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.223, 'neu': 0.607, 'pos': 0.17, 'compound': -0.6164}
{'neg': 0.0, 'neu': 0.868, 'pos': 0.132, 'compound': 0.4404}
{'neg': 0.0, 'neu': 0.873, 'pos': 0.127, 'compound': 0.4404}
{'neg': 0.177, 'neu': 0.689, 'pos': 0.134, 'compound': -0.3378}
{'neg': 0.0, 'neu': 0.909, 'pos': 0.091, 'compound': 0.4215}
{'neg': 0.0, 'neu': 0.887, 'pos': 0.113, 'compound': 0.5719}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.154, 'neu': 0.674, 'pos': 0.171, 'compound': -0.0258}
{'neg': 0.0, 'neu': 0.866, 'pos': 0.134, 'compound': 0.7783}
{'neg': 0.114, 'neu': 0.808, 'pos': 0.078, 'compound': -0.1027}
{'neg': 0.0, 'neu': 0.919, 'pos': 0.081, 'compound': 0.296}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound'

{'neg': 0.08, 'neu': 0.92, 'pos': 0.0, 'compound': -0.2732}
{'neg': 0.304, 'neu': 0.559, 'pos': 0.137, 'compound': -0.4019}
{'neg': 0.054, 'neu': 0.78, 'pos': 0.166, 'compound': 0.7579}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.877, 'pos': 0.123, 'compound': 0.7096}
{'neg': 0.162, 'neu': 0.792, 'pos': 0.046, 'compound': -0.6426}
{'neg': 0.0, 'neu': 0.8, 'pos': 0.2, 'compound': 0.8885}
{'neg': 0.134, 'neu': 0.866, 'pos': 0.0, 'compound': -0.5859}
{'neg': 0.0, 'neu': 0.914, 'pos': 0.086, 'compound': 0.5106}
{'neg': 0.196, 'neu': 0.804, 'pos': 0.0, 'compound': -0.5994}
{'neg': 0.0, 'neu': 0.797, 'pos': 0.203, 'compound': 0.9056}
{'neg': 0.321, 'neu': 0.605, 'pos': 0.073, 'compound': -0.7772}
{'neg': 0.069, 'neu': 0.931, 'pos': 0.0, 'compound': -0.3818}
{'neg': 0.184, 'neu': 0.816, 'pos': 0.0, 'compound': -0.7789}
{'neg': 0.0, 'neu': 0.859, 'pos': 0.141, 'compound': 0.5574}
{'neg': 0.127, 'neu': 0.832, 'pos': 0.041, 'compound': -0.5475}
{'neg': 0.219, 'neu

{'neg': 0.0, 'neu': 0.944, 'pos': 0.056, 'compound': 0.296}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.442, 'neu': 0.558, 'pos': 0.0, 'compound': -0.8779}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.11, 'neu': 0.779, 'pos': 0.111, 'compound': 0.2656}
{'neg': 0.175, 'neu': 0.825, 'pos': 0.0, 'compound': -0.5574}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.1, 'neu': 0.9, 'pos': 0.0, 'compound': -0.4404}
{'neg': 0.208, 'neu': 0.752, 'pos': 0.04, 'compound': -0.9153}
{'neg': 0.097, 'neu': 0.781, 'pos': 0.122, 'compound': 0.0316}
{'neg': 0.215, 'neu': 0.785, 'pos': 0.0, 'compound': -0.9743}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.577, 'pos': 0.423, 'compound': 0.296}
{'neg': 0.149, 'neu': 0.851, 'pos': 0.0, 'compound': -0.8266}
{'neg': 0.059, 'neu': 0.88, 'pos': 0.061, 'compound': 0.0258}
{'neg': 0.051, 'neu': 0.949, 'pos': 0.0, 'compound': -0.3182}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound

{'neg': 0.0, 'neu': 0.357, 'pos': 0.643, 'compound': 0.2023}
{'neg': 0.165, 'neu': 0.808, 'pos': 0.027, 'compound': -0.8294}
{'neg': 0.0, 'neu': 0.789, 'pos': 0.211, 'compound': 0.7717}
{'neg': 0.0, 'neu': 0.844, 'pos': 0.156, 'compound': 0.743}
{'neg': 0.0, 'neu': 0.773, 'pos': 0.227, 'compound': 0.6486}


In [16]:
category = ["Positive","Neutral","Negative"]
media = [media_pos, media_neu, media_neg]
#print(media)

#categorypositive = ["Positive"] * len(positive_list)
#categoryneutral = ["Neutral"] * len(neutral_list)
#categorynegative = ["Negative"] * len(negative_list)

#category = categorypositive + categoryneutral + categorynegative

df_fake2 = pd.DataFrame(
    {'Media': media,
     'Category': category
    })

Let's create the chart:

In [14]:
range_ = ['#D41159', '#1A85FF', '#00a300']


bars = alt.Chart(df_fake2).mark_bar(size=40).encode(
    x=alt.X('Category:N',title="Sentiment"),
    y=alt.Y('Media:Q',title="Media"),
    color=alt.Color('Category:N', scale=alt.Scale(range=range_),legend=None),  #legend=None for the Tufte's data ink ratio principle
    #column="Category:N"
).properties(
    title="On average how much are the tweets in the dataset positive, negative and neutral? - Fake Covid-19 dataset", 
    width=300
).configure_title(
    fontSize=17,
    offset=25
).configure_axis(
    labelFontSize=13,
    titleFontSize=15,
    titlePadding=15
).configure_axisX(
    labelAngle=360
).configure_legend(
    titleFontSize=14,
    labelFontSize=12,
    titlePadding=10
).configure_header(
    titleFontSize=15,
    labelFontSize=12
)

bars