In [None]:
# Import modules
import json
import csv
import math


from textwrap import wrap
import itertools 
from itertools import combinations

# Classic analysis imports
import numpy as np 
import pandas as pd 
import matplotlib as mp
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import seaborn as sns

# Stat analysis import 
from sklearn import preprocessing
from sklearn.cluster import KMeans


import scipy 
from scipy.stats import chi2_contingency
from scipy.stats import chi2
from statsmodels.sandbox.stats.multicomp import multipletests

import scikit_posthocs as sp

import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.stats.stattools as stools

import statsmodels.stats as stats 

from statsmodels.stats.anova import AnovaRM
from statsmodels.stats.anova import anova_lm

from statsmodels.stats import multicomp as mc
from statsmodels.miscmodels.ordinal_model import OrderedModel

#from pymer4.models import Lmer

#import pingouin as pg

# options for the notebook
pd.set_option('display.max_colwidth',1000)
plt.style.use('seaborn')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = [5, 5]

#%load_ext rpy2.ipython

In [None]:
from scipy.stats import chi2_contingency
from scipy.stats import norm
def chisq_posthoc_corrected(cross_table, correction_method='bonferroni', alpha=.05):
    """
    Get crosstab dataframe and do a chisquared test followed with the post-hoc with analysis of adjusted residuals
    source: https://colab.research.google.com/drive/1QIDHMvpDq7Max5hk2mozSFdVssavdV-I#scrollTo=ig1kdg40qLDH
    source: https://github.com/neuhofmo/chisq_test_wrapper
    """
    def get_asterisks_for_pval(p_val, alpha=0.05):
        """Receives the p-value and returns asterisks string."""
        if p_val > alpha:  # bigger than alpha
            p_text = "ns"
        # following the standards in biological publications
        elif p_val < 1e-4:  
            p_text = '****'
        elif p_val < 1e-3:
            p_text = '***'
        elif p_val < 1e-2:
            p_text = '**'
        else:
            p_text = '*'
    
        return p_text  # string of asterisks
    
    chiVal, pVal, df, exp = chi2_contingency(cross_table)

    colTotals = cross_table.sum()
    nCols = len(colTotals)
    rowTotals = cross_table.sum(axis=1)
    nRows = len(rowTotals)
    n = sum(rowTotals)
    print("Chi2 result of the contingency table: {}, p-value: {}, dof: {}, N: {}\n".format(chiVal, pVal, df, n))

    
    for i in range(nRows):
        for j in range(nCols):
            AdjRes = (cross_table.iloc[i,j] - exp[i,j]) / (exp[i,j]*(1-rowTotals[i]/n)*(1-colTotals[j]/n))**0.5            
    phRes = pd.DataFrame(columns=[cross_table.index.name, cross_table.columns.name, 'Adj. Res.'])
    for i in range(nRows):
        for j in range(nCols):
            AdjRes = (cross_table.iloc[i,j] - exp[i,j]) / (exp[i,j]*(1-rowTotals[i]/n)*(1-colTotals[j]/n))**0.5
            phRes = phRes.append({cross_table.index.name:cross_table.index[i], cross_table.columns.name:cross_table.columns[j], 'Adj. Res.':AdjRes}, ignore_index=True)
    phRes['p_value'] = 2*(1-norm.cdf(abs(phRes['Adj. Res.'])))
    # Bonferroni correction
  
    reject_list, corrected_p_vals = multipletests(phRes['p_value'], method=correction_method, alpha=alpha)[:2]
    
    phRes['p_value_corrected'] = corrected_p_vals
    phRes['reject'] = reject_list
    ast = []
    for p_vals in corrected_p_vals:
        ast.append(get_asterisks_for_pval(p_vals))
    phRes['asterisques'] = ast

    return phRes


In [None]:
# Source: https://medium.com/analytics-vidhya/create-your-own-coefficient-plot-function-in-python-aadb9fe27a77
# Define function to output plot of the model coefficients

def coefplot(results):
    '''
    Takes in results of OLS model and returns a plot of 
    the coefficients with 95% confidence intervals.
    
    Removes intercept, so if uncentered will return error.
    '''
    # Create dataframe of results summary 
    coef_df = pd.DataFrame(results.summary().tables[1].data)
    
    # Add column names
    coef_df.columns = coef_df.iloc[0]

    # Drop the extra row with column labels
    coef_df=coef_df.drop(0)

    # Set index to variable names 
    coef_df = coef_df.set_index(coef_df.columns[0])

    # Change datatype from object to float
    coef_df = coef_df.astype(float)

    # Get errors; (coef - lower bound of conf interval)
    errors = coef_df['coef'] - coef_df['[0.025']
    
    # Append errors column to dataframe
    coef_df['errors'] = errors

    # Drop the constant for plotting
    coef_df = coef_df.drop(['const'])

    # Sort values by coef ascending
    coef_df = coef_df.sort_values(by=['coef'])

    ### Plot Coefficients ###

    # x-labels
    variables = list(coef_df.index.values)
    
    # Add variables column to dataframe
    coef_df['variables'] = variables
    
    # Set sns plot style back to 'poster'
    # This will make bars wide on plot
    sns.set_context("poster")

    # Define figure, axes, and plot
    fig, ax = plt.subplots(figsize=(15, 10))
    
    # Error bars for 95% confidence interval
    # Can increase capsize to add whiskers
    coef_df.plot(x='variables', y='coef', kind='bar',
                 ax=ax, color='none', fontsize=22, 
                 ecolor='steelblue',capsize=0,
                 yerr='errors', legend=False)
    
    # Set title & labels
    plt.title('Coefficients of Features w/ 95% Confidence Intervals',fontsize=30)
    ax.set_ylabel('Coefficients',fontsize=22)
    ax.set_xlabel('',fontsize=22)
    
    # Coefficients
    ax.scatter(x=pd.np.arange(coef_df.shape[0]), 
               marker='o', s=80, 
               y=coef_df['coef'], color='steelblue')
    
    # Line to define zero on the y-axis
    ax.axhline(y=0, linestyle='--', color='red', linewidth=1)
    
    return plt.show()

In [None]:
# Setting up the folders 
data_folder = "../data/"
analysis_results_folder = "../results/"
plots_folders = "../plots"

# Filenames
dataset_clean_filename = 'cleaned_dataset_per_subject.csv'

dataset_news_full_filename = 'cleaned_dataset_per_news.csv'

df_subject = pd.read_csv('{}{}'.format(data_folder, dataset_clean_filename))
df_news = pd.read_csv('{}{}'.format(data_folder, dataset_news_full_filename))
# Fix some variable for later analysis
#df_news['answer'] = df_news['answer'].astype('category') # Transform answer as factor
#df_news['id_sondea'] = df_news['id_sondea'].astype(str) # Transform id_sondea as str for transforming into category later
df_news

In [None]:
df_subject

# Plots and tables

## Type news - Right/Wrong

In [None]:
count_type_news_answer = pd.crosstab(df_news['type_news'], df_news['answer'])
count_type_news_answer.to_csv('./tables/type_news_answer_count.csv', index=False)
count_type_news_answer

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
x_labels = ['Misinformation', 'Legitimate information']

sns.countplot(hue="answer", x='type_news',  data=df_news, ax=ax,)

ax.set_xticklabels(x_labels)
ax.set(xlabel='Type of news', ylabel='Count of answer')

ax.yaxis.grid(True, clip_on=False)                                                 
sns.despine(left=True, bottom=True)    

fig.savefig('./plots/right_wrong_per_type.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/right_wrong_per_type.png', format='png', bbox_inches='tight')
plt.show()

A Chisquare test did not show influence of the nature of the news and the capacity for the subjects to correctly identify them X^2(1, N=1680)= 2.54, p=.11.

In [None]:
type_news_chisquare = chisq_posthoc_corrected(pd.crosstab(df_news['type_news'], df_news['answer']))
type_news_chisquare.to_csv('./tables/type_news_chisquare.csv', index=False)
type_news_chisquare

## News titles - Right/ Wrong

In [None]:
crosstab_news_titles = pd.crosstab(df_news['news_title'], df_news['answer'])
crosstab_news_titles.to_csv('./tables/news_titles_right_wrong_count.csv', index=True)
crosstab_news_titles

In [None]:
#to_plot = df_news[['answer', 'news_title', 'type_news']].value_counts().to_frame().reset_index()#.rename(columns={'Answer', 'News Title', 'Type of News', 'Count'})
fig, axes = plt.subplots(1,2,figsize=(20,10))
axes = axes.flatten()
type_news = ['fake_news', 'true_news']
for type_ ,ax in zip(type_news,axes):
    #to_plot = df_news[df_news.type_news==type_][['news_title','answer']].value_counts().sort_values(ascending=False)
    
    to_plot = df_news[df_news.type_news==type_][['answer', 'news_title', 'type_news']].value_counts().to_frame().reset_index()
    
    
    labels = to_plot['news_title'].unique()
    labels = [ '\n'.join(wrap(l, 30)) for l in labels ]
    count_right = to_plot[to_plot['answer'] == 'Right'][0]
    count_wrong = to_plot[to_plot['answer'] == 'Wrong'][0]
    width = 0.7

    ax.barh(labels, count_right, width, label='Right')
    ax.barh(labels, count_wrong, width, left=count_right,
           label='Wrong')
    ax.set_xlabel("Count of answer", fontsize='x-large')
    ax.set_ylabel('', fontsize='x-large') 
    
    if type_ == 'fake_news':
        title_plot = 'Misinformation'
    elif type_ == 'true_news':
        title_plot = 'Legitimate information'
    else:
        raise
    ax.set_title(title_plot, fontdict={'fontsize': 'xx-large'})
    ax.yaxis.grid(False)                                                 

    plt.xticks(fontsize='large')
    plt.yticks(fontsize='large')
plt.legend()
fig.subplots_adjust(wspace=.5)    
fig.savefig('./plots/right_wrong_per_news.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/right_wrong_per_news.png', format='png', bbox_inches='tight')    
    
    #df.unstack().plot.barh(ax=ax, stacked=True)

There is difference between the news about receiving a right or a wrong answers, some news got a fairly balanced answers, while some are more imbalanced in one way or the other. 

## Socio-demographic information

### Gender

In [None]:
gender_count = df_subject['Gender'].value_counts().to_frame().reset_index().rename(columns={'index': 'Gender', 'Gender': "Count"})
#gender_count = gender_count.replace({"Femenino": "Female", 'Masculino': "Male"})
gender_count.to_csv('./tables/gender_counts.csv', index=False)
gender_count

In [None]:
fig, ax = plt.subplots()
ax.bar(gender_count['Gender'], gender_count['Count'])

ax.set_xlabel("Gender", fontsize='large')
ax.set_ylabel('Count', fontsize='large') 



ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')



plt.suptitle('Counts for Gender', fontsize='xx-large')


plt.tight_layout()


fig.savefig('./plots/gender_count.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/gender_count.png', format='png', bbox_inches='tight')
plt.show()

### Education

In [None]:
gender_count = df_subject['Education'].value_counts().to_frame().reset_index().rename(columns={'index': 'Education', 'Education': "Count"})
#.replace({"University_studies": "University level", 'No_university_studies': "No University level"})
gender_count.to_csv('./tables/education_counts.csv', index=False)
gender_count

In [None]:
fig, ax = plt.subplots()

ax.bar(gender_count['Education'], gender_count['Count'])

ax.set_xlabel("Education level", fontsize='large')
ax.set_ylabel('Count', fontsize='large') 



ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')



plt.suptitle('Counts for education level', fontsize='xx-large')



plt.tight_layout()
fig.savefig('./plots/education_counts.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/education_counts.png', format='png', bbox_inches='tight')

plt.show()

In [None]:
gender_count2 = df_subject['Education2'].value_counts().to_frame().reset_index().rename(columns={'index': 'Education', 'Education2': "Count"})
#.replace({"University_studies": "University level", 'No_university_studies': "No University level"})


sorter = ["Secondary", "College", "University"]
gender_count2.Education = gender_count2.Education.astype("category")

gender_count2.Education.cat.set_categories(sorter, inplace=True)
gender_count2 = gender_count2.sort_values('Education')
gender_count2.to_csv('./tables/education2_counts.csv', index=False)
gender_count2

In [None]:
fig, ax = plt.subplots()

ax.bar(gender_count2['Education'], gender_count2['Count'])

ax.set_xlabel("Education level", fontsize='large')
ax.set_ylabel('Count', fontsize='large') 



ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')



plt.suptitle('Counts for education level', fontsize='xx-large')



plt.tight_layout()
fig.savefig('./plots/education2_counts.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/education2_counts.png', format='png', bbox_inches='tight')

plt.show()

### Age 

In [None]:
gender_count = df_subject['Age'].value_counts().to_frame().reset_index().rename(columns={'index': 'Age', 'Age': "Count"})
#gender_count = gender_count.replace({"University_studies": "University level", 'No_university_studies': "No University level"})
sorter = ["<=18-34", "35-54", ">55"]
gender_count.Age = gender_count.Age.astype("category")

gender_count.Age.cat.set_categories(sorter, inplace=True)
gender_count = gender_count.sort_values('Age')
gender_count.to_csv('./tables/age_counts.csv', index=False)
gender_count

In [None]:
fig, ax = plt.subplots()

ax.bar(gender_count['Age'], gender_count['Count'])

ax.set_xlabel("Age", fontsize='large')
ax.set_ylabel('Count', fontsize='large') 



ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')



plt.suptitle('Counts for age', fontsize='xx-large')



plt.tight_layout()
fig.savefig('./plots/age_counts.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/age_counts.png', format='png', bbox_inches='tight')

plt.show()

### Technological knowledge


In [None]:
gender_count = df_subject['Technological'].value_counts().to_frame().reset_index().rename(columns={'index': 'Technological knowledge', 'Technological': "Count"})
#gender_count = gender_count.replace({"Avanzada": "Advanced", 'Media': "Intermediary", "Básica": "Basic"})


sorter = ["Basic", "Intermediate", "Advanced"]
gender_count["Technological knowledge"] = gender_count["Technological knowledge"].astype("category")

gender_count["Technological knowledge"].cat.set_categories(sorter, inplace=True)
gender_count = gender_count.sort_values("Technological knowledge")


gender_count.to_csv('./tables/tech_level_counts.csv', index=False)
gender_count

In [None]:
fig, ax = plt.subplots()

ax.bar(gender_count['Technological knowledge'], gender_count['Count'])

ax.set_xlabel("Technological knowledge", fontsize='large')
ax.set_ylabel('Count', fontsize='large') 



ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')



plt.suptitle('Counts for Technological knowledge', fontsize='xx-large')



plt.tight_layout()
fig.savefig('./plots/tech_level_counts.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/tech_level_counts.png', format='png', bbox_inches='tight')

plt.show()

### Religion

In [None]:
gender_count = df_subject['Religion'].value_counts().to_frame().reset_index().rename(columns={'index': 'Religion', 'Religion': "Count"})
#gender_count = gender_count.replace({"University_studies": "University level", 'No_university_studies': "No University level"})
gender_count.to_csv('./tables/religion_counts.csv', index=False)
gender_count

In [None]:
fig, ax = plt.subplots()

ax.bar(gender_count['Religion'], gender_count['Count'])

ax.set_xlabel("Religion", fontsize='large')
ax.set_ylabel('Count', fontsize='large') 



ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')



plt.suptitle('Counts for Religous vs No Religious', fontsize='xx-large')



plt.tight_layout()
fig.savefig('./plots/religion_counts.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/religion_counts.png', format='png', bbox_inches='tight')

plt.show()

### Politics

In [None]:
gender_count = df_subject['Political'].value_counts().to_frame().reset_index().rename(columns={'index': 'Political alignment', 'Political': "Count"})
#gender_count = gender_count.replace({"Izquierda": "Left", 'Derecha': "Right", "Centro": "Centre"})


sorter = ["Left", "Centre", "Right"]
gender_count["Political alignment"] = gender_count["Political alignment"].astype("category")

gender_count["Political alignment"].cat.set_categories(sorter, inplace=True)
gender_count = gender_count.sort_values("Political alignment")

gender_count.to_csv('./tables/politic_counts.csv', index=False)
gender_count

In [None]:
fig, ax = plt.subplots()

ax.bar(gender_count["Political alignment"], gender_count['Count'])

ax.set_xlabel("Political alignment", fontsize='large')
ax.set_ylabel('Count', fontsize='large') 



ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')



plt.suptitle('Counts for "Political alignment"', fontsize='xx-large')



plt.tight_layout()
fig.savefig('./plots/politic_counts.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/politic_counts.png', format='png', bbox_inches='tight')

plt.show()

## Justifications

In [None]:
# Justification columns


var_just_fake = ["Previously_read_debunked",
                 "Source_unknown",
                 "Media_unreliable",
                 "Cited_sources_unknown",
                 "Cited_sources_unreliable",
                 "Without_sources",
                 "Unprofessional_style",
                 "No_coherent",
                 "Headline_sensationalist",
                 "Image_sensationalist",
                 "Different_belief",
                 "Different_ideology",
                 "Other"]

var_just_true = ["Previously_read_the_information",
                 "Known_media",
                 "Reliable_media",
                 "Source_known",
                 "Source_Reliable",
                 "Professional_style",
                 "Coherent",
                 "Same_belief",
                 "Same_ideology",
                 "Other"]

## Creating mask to sample only when Participants were presented the Justification right and Justification wrong


In [None]:
# Melting the different justifications 
df_new_to_plot_right = pd.melt(df_news, id_vars= ['answer', 'type_news', 'id_sondea', 'news_title'], value_vars=[*var_just_true ])
#Rename the justification to be the same 

#df_new_to_plot_right['variable'] = df_new_to_plot_right['variable'].str[4:]

mask_justification_right = (((df_new_to_plot_right['type_news'] == 'true_news') & (df_new_to_plot_right['answer'] == 'Right')) | ((df_new_to_plot_right['type_news'] == 'fake_news') & (df_new_to_plot_right['answer'] == 'Wrong')))
df_new_to_plot_right = df_new_to_plot_right[mask_justification_right].copy()

# Replacing the answer 'otro' by True rather than keeping the name
special_answers = ~df_new_to_plot_right.value.isin([True, False, np.NaN])
df_new_to_plot_right.loc[special_answers, 'value'] = True

In [None]:
to_plot = df_new_to_plot_right.groupby(by=['variable', 'answer', 'type_news'])['value'].sum().to_frame().reset_index()#.rename(columns=['Justification', 'answer', 'type_news', 'count'])
# Filtering only the justification right
mask_justification_right = (((to_plot['type_news'] == 'true_news') & (to_plot['answer'] == 'Right')) | ((to_plot['type_news'] == 'fake_news') & (to_plot['answer'] == 'Wrong')))
to_plot = to_plot[mask_justification_right]
# Reorder plot based on the right answer
to_plot = to_plot.sort_values(by=['answer', 'value'])
to_plot

In [None]:
# Melting the different justifications 
df_new_to_plot_wrong = pd.melt(df_news, id_vars= ['answer', 'type_news', 'id_sondea', 'news_title'], value_vars=[*var_just_fake ])
#Rename the justification to be the same 

# Replacing the answer 'otro' by True rather than keeping the name
special_answers = ~df_new_to_plot_wrong.value.isin([True, False, np.NaN])
df_new_to_plot_wrong.loc[special_answers, 'value'] = True

### Justifications when the subject think it is misinformation

In [None]:
to_plot = df_new_to_plot_wrong.groupby(by=['variable', 'answer', 'type_news'])['value'].sum().to_frame().reset_index()#.rename(columns=['Justification', 'answer', 'type_news', 'count'])
# Filtering only the justification right
mask_justification_wrong = (((to_plot['type_news'] == 'true_news') & (to_plot['answer'] == 'Wrong')) | ((to_plot['type_news'] == 'fake_news') & (to_plot['answer'] == 'Right')))
to_plot = to_plot[mask_justification_wrong]
# Reorder plot based on the right answer
to_plot = to_plot.sort_values(by=['answer', 'value'])
to_plot

In [None]:
labels = to_plot['variable'].unique()
count_right = to_plot[to_plot['type_news'] == 'fake_news']['value']
count_wrong = to_plot[to_plot['type_news'] == 'true_news']['value']
width = 0.5
fig, ax = plt.subplots()

ax.barh(labels, count_right, width, label='Right')
ax.barh(labels, count_wrong, width, left=count_right,
      label='Wrong')
ax.set_xlabel("Count of answers", fontsize='large')
ax.set_ylabel('Justification', fontsize='large') 


ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')


plt.tight_layout()
plt.legend()

plt.suptitle('Justification when Subject thinks it is misinformation', fontsize=16)
fig.savefig('./plots/justification_fake_news.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/justification_fake_news.png', format='png', bbox_inches='tight')

plt.show()

### Justifications when the subject thinks it is a True news

In [None]:
to_plot = df_new_to_plot_right.groupby(by=['variable', 'answer', 'type_news'])['value'].sum().to_frame().reset_index()#.rename(columns=['Justification', 'answer', 'type_news', 'count'])
# Filtering only the justification right
mask_justification_wrong = (((to_plot['type_news'] == 'true_news') & (to_plot['answer'] == 'Right')) | ((to_plot['type_news'] == 'fake_news') & (to_plot['answer'] == 'Wrong')))
to_plot = to_plot[mask_justification_wrong]
# Reorder plot based on the right answer
to_plot = to_plot.sort_values(by=['answer', 'value'])
to_plot

In [None]:
labels = to_plot['variable'].unique()
count_right = to_plot[to_plot['type_news'] == 'fake_news']['value']
count_wrong = to_plot[to_plot['type_news'] == 'true_news']['value']
width = 0.5
fig, ax = plt.subplots()

ax.barh(labels, count_right, width, label='Wrong')
ax.barh(labels, count_wrong, width, left=count_right,
      label='Right')
ax.set_xlabel("Count of answers", fontsize='large')
ax.set_ylabel('Justification', fontsize='large') 


ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')
plt.tight_layout()

plt.legend()
#plt.legend(labels=["Right answer","Wrong answer"], fontsize='medium')

plt.suptitle('Justification when Subject thinks it is legitimate information', fontsize=16)
fig.savefig('./plots/justification_right_news.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/justification_right_news.png', format='png', bbox_inches='tight')

plt.show()

### Justification, people getting wrong on Misinformation

In [None]:
# Filtering only the justification right
mask_justification_fake_wrong = (((df_new_to_plot_right['type_news'] == 'fake_news') & (df_new_to_plot_right['answer'] == 'Wrong')))
to_plot = df_new_to_plot_right[mask_justification_fake_wrong]

to_plot = to_plot.groupby(by=['variable', 'answer', 'type_news'])['value'].sum().to_frame().reset_index()#.rename(columns=['Justification', 'answer', 'type_news', 'count'])

# Reorder plot based on the right answer
to_plot = to_plot.sort_values(by=['answer', 'value']).drop(columns=['answer', 'type_news'])
to_plot

In [None]:
width = 0.5
fig, ax = plt.subplots()
labels = to_plot['variable']
counts = to_plot['value']
ax.barh(labels, counts, width, label='Wrong')

ax.set_xlabel("Count of answers", fontsize='large')
ax.set_ylabel('Justification', fontsize='large') 


#ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')

#plt.legend(labels=["Right answer","Wrong answer"], fontsize='medium')

plt.suptitle('Justification when subject consider a Misinformation as Legitimate', fontsize=16)

#plt.tight_layout()

fig.savefig('./plots/justification_fake_wrong.svg', bbox_inches='tight')
fig.savefig('./plots/justification_fake_wrong.png', bbox_inches='tight')

plt.show()

In [None]:
df_new_to_plot_right[mask_justification_fake_wrong]['id_sondea'].unique().size

 ### Justification, people getting right on Legitimate information

In [None]:
# Filtering only the justification right
mask_justification_true_right = (((df_new_to_plot_right['type_news'] == 'true_news') & (df_new_to_plot_right['answer'] == 'Right')))
to_plot = df_new_to_plot_right[mask_justification_true_right]

to_plot = to_plot.groupby(by=['variable', 'answer', 'type_news'])['value'].sum().to_frame().reset_index()#.rename(columns=['Justification', 'answer', 'type_news', 'count'])

# Reorder plot based on the right answer
to_plot = to_plot.sort_values(by=['answer', 'value']).drop(columns=['answer', 'type_news'])
to_plot

In [None]:
width = 0.5
fig, ax = plt.subplots()
labels = to_plot['variable']
counts = to_plot['value']
ax.barh(labels, counts, width, label='Wrong')

ax.set_xlabel("Count of answers", fontsize='large')
ax.set_ylabel('Justification', fontsize='large') 


#ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')

#plt.legend(labels=["Right answer","Wrong answer"], fontsize='medium')

plt.suptitle('Justification when subject consider a Legitimate as Legitimate', fontsize=16)

#plt.tight_layout()

fig.savefig('./plots/justification_true_right.svg', bbox_inches='tight')
fig.savefig('./plots/justification_true_right.png', bbox_inches='tight')

plt.show()

In [None]:
df_new_to_plot_right[mask_justification_true_right]['id_sondea'].unique().size

### Justification, people getting Right on Misinformation

In [None]:
# Filtering only the justification right
mask_justification_fake_right = (((df_new_to_plot_wrong['type_news'] == 'fake_news') & (df_new_to_plot_wrong['answer'] == 'Right')))
to_plot = df_new_to_plot_wrong[mask_justification_fake_right]

to_plot = to_plot.groupby(by=['variable', 'answer', 'type_news'])['value'].sum().to_frame().reset_index()#.rename(columns=['Justification', 'answer', 'type_news', 'count'])

# Reorder plot based on the right answer
to_plot = to_plot.sort_values(by=['answer', 'value']).drop(columns=['answer', 'type_news'])
to_plot

In [None]:
width = 0.5
fig, ax = plt.subplots()
labels = to_plot['variable']
counts = to_plot['value']
ax.barh(labels, counts, width, label='Wrong')

ax.set_xlabel("Count of answers", fontsize='large')
ax.set_ylabel('Justification', fontsize='large') 


#ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')

#plt.legend(labels=["Right answer","Wrong answer"], fontsize='medium')

plt.suptitle('Justification when subject consider a Misinformation as Misinformation', fontsize=16)

#plt.tight_layout()

fig.savefig('./plots/justification_fake_right.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/justification_fake_right.png', format='png', bbox_inches='tight')

plt.show()

### Justification, people getting Wrong on Misinformation

In [None]:
# Filtering only the justification right
mask_justification_true_wrong = (((df_new_to_plot_wrong['type_news'] == 'true_news') & (df_new_to_plot_wrong['answer'] == 'Wrong')))
to_plot = df_new_to_plot_wrong[mask_justification_true_wrong]

to_plot = to_plot.groupby(by=['variable', 'answer', 'type_news'])['value'].sum().to_frame().reset_index()#.rename(columns=['Justification', 'answer', 'type_news', 'count'])

# Reorder plot based on the right answer
to_plot = to_plot.sort_values(by=['answer', 'value']).drop(columns=['answer', 'type_news'])
to_plot

In [None]:
width = 0.5
fig, ax = plt.subplots()
labels = to_plot['variable']
counts = to_plot['value']
ax.barh(labels, counts, width, label='Wrong')

ax.set_xlabel("Count of answers", fontsize='large')
ax.set_ylabel('Justification', fontsize='large') 


#ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')

#plt.legend(labels=["Right answer","Wrong answer"], fontsize='medium')

plt.suptitle('Justification when subject consider a Legitimate information as Misinformation', fontsize=16)

#plt.tight_layout()

fig.savefig('./plots/justification_true_wrong.svg', format='svg', bbox_inches='tight')
fig.savefig('./plots/justification_true_wrong.png', format='png', bbox_inches='tight')

plt.show()

## Actions

In [None]:
var_actions = ['share_friends_and_family', 
               'share_online', 
               'verify_source', 
               "apply_learning", 
               'no_action']

In [None]:
# Melting the different justifications 
df_new_to_plot_action = pd.melt(df_news, id_vars= ['answer', 'type_news', 'id_sondea', 'news_title'], value_vars=[*var_actions])
#Rename the justification to be the same 
mask_action = (((df_new_to_plot_action['type_news'] == 'true_news') & (df_new_to_plot_action['answer'] == 'Right')) | ((df_new_to_plot_action['type_news'] == 'fake_news') & (df_new_to_plot_action['answer'] == 'Wrong')))
df_new_to_plot_action_filtered = df_new_to_plot_action[mask_action].copy()
#df_new_to_plot_right_filtered['variable'] = df_new_to_plot_right_filtered['variable'].str[4:]

#df_new_to_plot_right['variable'] = np.where(df_new_to_plot_right['value']== 1, df_new_to_plot_right['variable'],np.NaN )
df_new_to_plot_action_filtered

In [None]:
to_plot = df_new_to_plot_action_filtered.groupby(by=['variable', 'answer', 'type_news'])['value'].sum().to_frame().reset_index()#.rename(columns=['Justification', 'answer', 'type_news', 'count'])
# Filtering only the justification TR or FW
mask_action = (((to_plot['type_news'] == 'true_news') & (to_plot['answer'] == 'Right')) | ((to_plot['type_news'] == 'fake_news') & (to_plot['answer'] == 'Wrong')))

to_plot = to_plot[mask_action].copy()
to_plot

In [None]:
labels = to_plot['variable'].unique()

count_right = to_plot[to_plot['type_news'] == 'true_news']['value']
count_wrong = to_plot[to_plot['type_news'] == 'fake_news']['value']

In [None]:
to_plot = to_plot.sort_values(by=['answer', 'value'])
labels = to_plot['variable'].unique()
count_right = to_plot[to_plot['type_news'] == 'fake_news']['value']
count_wrong = to_plot[to_plot['type_news'] == 'true_news']['value']
width = 0.5
fig, ax = plt.subplots()

ax.barh(labels, count_right, width, label='Right')
ax.barh(labels, count_wrong, width, left=count_right,
      label='Wrong')
ax.set_xlabel("Count of answers", fontsize='large')
ax.set_ylabel('Actions', fontsize='large') 

ax.yaxis.grid(False)   
ax.xaxis.grid(False)                                                 


plt.xticks(fontsize='large')
plt.yticks(fontsize='x-large')


#plt.legend(labels=["Right answer","Wrong answer"], fontsize='large')
plt.legend()
plt.suptitle('Actions when subject thinks it is legitimate information', fontsize=18)
#plt.tight_layout()
fig.savefig('./plots/action_true_news.svg', format='svg')
fig.savefig('./plots/action_true_news.png', format='png')

plt.show()