### Latent ideology of users

LI is based on the media outlet shared by each user, therefore the ideology reflects the group of media an user shares.

In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import matplotlib 

np.random.seed(123459)

In [25]:
# Data with only columns of interest
data = pd.read_csv('Data/dataEchoChambers_with_article_info.csv', usecols=['tw_id', 'fecha', 'usr_id', 'opinion_dinamic', 'url_expanded', 'medio'])
data['fecha'] = pd.to_datetime(data['fecha'])
data.head(n = 5)

### Encoding matrix user - media share
User - media shares dataframe based on one-hot encoding of media shares

In [26]:
user_media_df = pd.DataFrame()

# user_id column
user_media_df['usr_id'] = data['usr_id']

#creating instance of one-hot-encoder and one-hot encoding of media shares
encoder = OneHotEncoder(handle_unknown='ignore')
encoder_df = pd.DataFrame(encoder.fit_transform(data[['medio']]).toarray())
encoder_df.columns = list(encoder.categories_[0])

user_media_df = user_media_df.join(encoder_df)

In [27]:
user_media_df = user_media_df.groupby('usr_id').sum()
user_media_df.head(n = 5)

In [28]:
ppal_media = ['Clarin', 'La Nacion', 'Pagina 12', 'Todo Noticias', 'Infobae', 'El Destape']
ppal_media += ['Ambito Financiero', 'Minuto Uno', 'Perfil', 'El Cronista', 'Radio Mitre', 'El Dia']

user_media_df_filtrado = user_media_df[ppal_media].copy()
user_media_df_filtrado = user_media_df_filtrado[user_media_df_filtrado.apply(np.count_nonzero, axis = 1) > 0].copy()

In [29]:
# Matrix of user - media shared
A = user_media_df_filtrado.to_numpy()

In [30]:
score_users = np.random.normal(loc = 0, scale = 1.00, size = A.shape[0])
score_users = (score_users - np.mean(score_users))/np.std(score_users)

for iteration in range(50):
    
    score_media = np.array([np.average(score_users, weights=A[:,i]) for i in range(A.shape[1])])
    score_users = np.array([np.average(score_media, weights=A[j,:]) for j in range(A.shape[0])])

    score_users = (score_users - np.mean(score_users))/np.std(score_users)

In [31]:
score_media = np.array([np.average(score_users, weights=A[:,i]) for i in range(A.shape[1])])
media_score = [[user_media_df_filtrado.columns[i], score_media[i]] for i in range(len(score_media))]
print(media_score)

In [33]:
# Scores of the users
fig, ax = plt.subplots(dpi = 300, figsize=(5,3))

ax.hist(score_users, bins = np.arange(-2, 2, 0.25), alpha = 0.50, color = 'g', density = True)
ax.set_xlabel(r'MSI', size = 14)

ax2 = ax.twinx()
for ms in media_score:
    ax2.axvline(ms[1], color = 'grey', alpha = 0.65)
    if ms[0] != 'Clarin':
        ax2.text(x = ms[1]-0.02, y = 0.5, s=ms[0], rotation = 90, size = 6)
    else: 
        ax2.text(x = ms[1]-0.02, y = 0.25, s=ms[0], rotation = 90, size = 6)
ax2.set_yticks([])
ax.set_xlim(np.quantile(score_users, q = [0.005, 0.995]) + np.array([-0.65, 0.65]))
print(np.quantile(score_users, q = [0.005, 0.995]))
fig.tight_layout()
plt.savefig('Results/MSI.pdf')#, bbox_inches = matplotlib.transforms.Bbox.from_bounds(0, -0.25, 5, 2.25))
plt.savefig('Results/MSI.png')#, bbox_inches = matplotlib.transforms.Bbox.from_bounds(0, -0.25, 5, 2.25))
plt.show()

In [34]:
print(np.quantile(score_users, q = [0.005, 0.995]))


In [10]:
user_media_df_filtrado['score'] = score_users

In [11]:
user_media_df_filtrado['score'].to_csv('Results/MSI_users_score_with_1outletshared.csv')

### Relation with ideology

In [12]:
from scipy.stats import mode 

user_ideology = data.groupby('usr_id').apply(lambda x: mode(x['opinion_dinamic'])[0][0])

ideology_of_msi_users = user_ideology[user_ideology.index.isin(user_media_df_filtrado.index)].to_list()

user_media_df_filtrado['ideology'] = ideology_of_msi_users

In [13]:
# Scores of the users
fig, ax = plt.subplots(dpi = 300, figsize=(5,3))


ideocolor = {'FF': 'b', 'MP': 'r'}
for ideology in ['FF', 'MP']:
    ax.hist(user_media_df_filtrado[user_media_df_filtrado['ideology'] == ideology]['score'], bins = np.arange(-2, 2, 0.25), alpha = 0.50, color = ideocolor[ideology], density = True, label = ['CL' if ideology == 'FF' else 'CR'])
ax.set_xlabel(r'MSI', size = 16)
ax.legend(loc = 'upper left', fontsize = 8)

ax2 = ax.twinx()
for ms in media_score:
    ax2.axvline(ms[1], color = 'grey', alpha = 0.65)
    if ms[0] != 'Clarin':
        ax2.text(x = ms[1]-0.02, y = 0.5, s=ms[0], rotation = 90, size = 6)
    else: 
        ax2.text(x = ms[1]-0.02, y = 0.25, s=ms[0], rotation = 90, size = 6)
ax2.set_yticks([])
ax.set_xlim(np.quantile(score_users, q = [0.005, 0.995]) + [-0.65, 0.65])
fig.tight_layout()
plt.savefig('Results/MSI_ideology.pdf')#, bbox_inches = matplotlib.transforms.Bbox.from_bounds(0, -0.25, 5, 2.25))
plt.savefig('Results/MSI_ideology.png')#, bbox_inches = matplotlib.transforms.Bbox.from_bounds(0, -0.25, 5, 2.25))
plt.show()

### Relation with sentiment bias 

In [14]:
media_score

In [15]:
df_aux = pd.read_csv('Data/dataEchoChambers_with_article_info.csv').drop_duplicates(subset=['url_expanded'])

In [16]:
def SB_Albanese2020(x):
    ans = (x['pos_mp'] - x['neg_mp']) - (x['pos_ff'] - x['neg_ff'])
    norm = x['N_ff'] + x['N_mp']
    if norm != 0:
        ans /= norm
        return ans 
    else:
        return np.nan

In [17]:
df_aux['SB'] = df_aux.apply(lambda x: SB_Albanese2020(x), axis = 1)

In [18]:
SB_medio = df_aux.groupby('medio')['SB'].mean().sort_values().reset_index()

In [19]:
SB_medio = SB_medio[SB_medio.medio.isin(ppal_media)].copy()

In [20]:
SB_medio.sort_values('medio', inplace=True)

In [21]:
msi = [m[1] for m in sorted(media_score, key = lambda x: x[0])]

In [22]:
SB_medio['MSI'] = msi

In [23]:
# Scores of the users
fig, ax = plt.subplots(dpi = 300, figsize=(7,7))

ax.scatter(SB_medio['MSI'], SB_medio['SB'], alpha = 0.50, color = 'g', s = 500)
ax.set_xlabel(r'MSI', size = 30)
ax.set_ylabel(r'$\langle SB \rangle$', size = 30)

for i in range(SB_medio.shape[0]):
    ax.text(SB_medio.iloc[i]['MSI'], SB_medio.iloc[i]['SB'], SB_medio.iloc[i]['medio'], size = 10)
    
ax.set_xlim([-2, 2])
ax.set_ylim(-0.20, 0.05)
#ax.set_title('Spearman r: {:.3f}'.format(SB_medio[['SB', 'MSI']].corr('spearman').to_numpy()[1,0]), size = 16)
ax.tick_params(axis='both', labelsize = 16)

fig.tight_layout()
plt.savefig('Results/MSI_SB.pdf')#, bbox_inches = matplotlib.transforms.Bbox.from_bounds(-0.5, -0.25, 7.25, 7.25))
plt.savefig('Results/MSI_SB.png')#, bbox_inches = matplotlib.transforms.Bbox.from_bounds(-0.5, -0.25, 7.25, 7.25))
plt.show()