# Journalist Discussions

_Do journalists talk to others, or mostly amongst themselves?_

---

Using the data collected, we hope that we are able to begin answering this question. By analysing the tweets collected during our monitoring period, and analysing the descriptions of each user profile contained within those tweets, we are able to classify users into two distinct groups, journalists and non journalists.

By then reading the content of all tweets authored by a user who is classified as a journalist, that are either replies or contain a mention of another twitter user, we can begin to establish whether journalists talk mostly amongst themselves or whether they interact with those outside of their community.


In [None]:
import json
import sys
import os.path
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

# add penemue to path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir)))
from utils import user_has_keyword
from utils import twiterate

In [None]:
def get_reply(t):
    """Format replies to show sender, reciever.
    
    :param t: A tweet object
    :return:  A python dict
    """
    
    return {'source': t['user']['id_str'], 
            'target': t['in_reply_to_user_id_str']}

In [None]:
def get_mention(t):
    """Extract mentioning user and the users mentioned.
    
    :param t: A tweet object
    :return:  A python dict
              The mentions key of the dictionary
              contains a list of strings
    """
    
    user_mentions = [mention['id_str'] 
                     for mention in t['entities']['user_mentions']]
    
    return {'user_id_str': t['user']['id_str'],
            'mentions': user_mentions}

In [None]:
tweets_file = '../data/output/journalist_discussions_within_dataset.json'

In [None]:
joi = []
oth = []

In [None]:
with open('../data/output/users.json', 'r') as usersfile:
    users = json.load(usersfile)

    for user in users:
        if user_has_keyword(user):
            joi.append(user['id_str'])
        else: 
            oth.append(user['id_str'])

In [None]:
replies = [reply 
           for reply in twiterate(get_reply, tweets_file=tweets_file) 
               if reply['target'] is not None
               and reply['source'] in joi]

In [None]:
mentions = [mention
            for mention in twiterate(get_mention, tweets_file=tweets_file)
                if len(mention['mentions']) > 0
                and mention['user_id_str'] in joi]

In [None]:
pd.DataFrame([len(joi), len(oth)], 
             ['Journalists', 'Non Journalists'], 
             ['Users'])

In [None]:
len_mentions = len([id_str for m in mentions for id_str in m['mentions']])
joi_mentions_joi = len([n for m in mentions for n in m['mentions'] if n in joi])
joi_mentions_oth = len([n for m in mentions for n in m['mentions'] if n in oth])

In [None]:
pd.DataFrame([[joi_mentions_joi, "%.1f" % ((joi_mentions_joi / len_mentions) * 100)], 
              [joi_mentions_oth, "%.1f" % ((joi_mentions_oth / len_mentions) * 100)]],
             ['To Journalists', 'To Non Journalists'],
             ['Mentions by Journalists', '%'])

In [None]:
len_replies = len(replies)
joi_to_joi = len([reply for reply in replies if reply['target'] in joi])
joi_to_oth = len([reply for reply in replies if reply['target'] in oth])

In [None]:
pd.DataFrame([[joi_to_joi, "%.1f" % ((joi_to_joi / len_replies) * 100)], 
              [joi_to_oth, "%.1f" % ((joi_to_oth / len_replies) * 100)]], 
             ['To Journalists', 'To Non Journalists'], 
             ['Replies by Journalists', '%'])