#Network analysis for the repeal the 19th tweet network

In [1]:
import gzip
import json

import networkx as nx

In [2]:
# This takes about 10 sec to run. Be a little bit patient.
tweets = []
for line in gzip.open('../data/repealthe19th.jsonl.gz', 'rt'):
    if line:
        tweet = json.loads(line)
        tweets.append(tweet)

In [3]:
# What proportion of tweets in this list are retweets? Round to three decimal places.
retweets = [tweet for tweet in tweets if 'retweeted_status' in tweet.keys()]
len(retweets), len(retweets)/len(tweets) 

(14858, 0.6365077325108169)

In [4]:
# let's build a graph
G = nx.DiGraph()
for retweet in retweets:
    retweeting_user = retweet['user']
    
    retweeted_status = retweet['retweeted_status']
    retweeted_user = retweeted_status['user']
    
    G.add_edge(retweeted_user['screen_name'], retweeting_user['screen_name'])

In [5]:
preEdges = len(G.edges())
# Remove self loops
G.remove_edges_from(G.selfloop_edges())
print('from', preEdges, 'to', len(G.edges()))

from 14818 to 14795


In [49]:
# general stats
print('Nodes:', len(G.nodes()))
print('Edges:', len(G.edges()))
print('Density:', nx.density(G))
print(nx.is_strongly_connected(G))
print(len(list(nx.isolates(G))))

Nodes: 14874
Edges: 14795
Density: 6.687882193029756e-05
False
12


In [29]:
# find max out and in degrees for the network
maxOd = max(dict(G.out_degree(G.nodes())).values())
print('max out degree:', maxOd)
maxId = max(dict(G.in_degree(G.nodes())).values())
print('max in degree:', maxId)

max_outdeg_name = ' '
for name, Odeg in dict(G.out_degree(G.nodes())).items():
    if Odeg == maxOd:
        max_outdeg_name = name
print('max out degree:', max_outdeg_name)

# what is the second highest degree node?
twoMacOD = sorted(dict(G.out_degree(G.nodes())).values(),reverse=True)[1]

max_outdeg_name = ' '
for name, Odeg in dict(G.out_degree(G.nodes())).items():
    if Odeg == twoMacOD:
        max_outdeg_name = name
print('2nd max out degree:', max_outdeg_name)

max out degree: 900
max in degree: 48
max out degree: Samandjunk
2nd max out degree: m_kaish_esq


In [33]:
outDegs = dict(G.out_degree(G.nodes())).values()
zeroDegs = len([d for d in outDegs if d == 0])
print(zeroDegs/len(G.nodes()))

0.8691676751378244


In [37]:
# grab the largest connected component
Gcc = max(nx.weakly_connected_components(G), key=len)

print('porportion of nodes:',len(Gcc)/len(G.nodes()))

porportion of nodes: 0.800658867823047


In [50]:
Gcc

{'ScottPresler',
 'GwynTAlexander',
 'JGWTpaFl',
 'Lit_Whane',
 'Heinzilla',
 'kinglanikaa',
 'JakePlatinum',
 'JennIves',
 'CarrieEJohnston',
 'Jessiccanation',
 'KarsinXXX',
 'soIndi',
 'susqhb',
 'pauljustinc',
 'terigiles40',
 'jeanette27g',
 'Ardarakaminskya',
 'MCA420',
 'jachlinwilliams',
 'Salg26',
 'CantStumpTrump1',
 'cyclebygrace',
 'christinelotz',
 'Lee_Fairikson',
 'Fatifratt',
 'hannahthewho',
 'AcaciaIves',
 'xoSweetTweetxo',
 'MaryNesham',
 'AgathaGikunda',
 'isabelleshav',
 'ladygabes',
 'Shedd7Mike',
 'daley_maddy',
 'YeuxDeBuse',
 'hannahlmaurice',
 'PatriciaBWriter',
 'sarahfeatonby',
 'cdubbs913',
 'joshgreenman',
 'JvJProductions',
 't_mihaljevic',
 'LisaIronTongue',
 'BrettOrlob',
 'SOLOMONOFZION',
 'briannewendol',
 'StphanieBreton2',
 'natalieinezp',
 'vcbbpw_Carr',
 'pmswolfy',
 'hannahmariet27',
 'sydneett',
 'Big__AL_1',
 'peggystars',
 'Tay_HarBu',
 'dayy_jay',
 'ChristianDeW99',
 'jamesmurphypdx',
 'stnkpot',
 'lauren_wald',
 'Jen_Robinette',
 'BLang71',


In [42]:
foundTweet = {}
for tweet in tweets:
    if tweet['user']['screen_name'] == 'Samandjunk':
        foundTweet = tweet
foundTweet


{'created_at': 'Wed Oct 12 18:37:38 -0400 2016',
 'entities': {'hashtags': [{'indices': [21, 35], 'text': 'repealthe19th'}],
  'symbols': [],
  'urls': [],
  'user_mentions': []},
 'favorited': False,
 'id_str': '786335044981907456',
 'otherfields': {'favorite_count': '20890',
  'filter_level': 'low',
  'qd': '\x00',
  'rm': '\x00\x00\x00\x00'},
 'retweet_count': 10350,
 'retweeted': False,
 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>',
 'text': "How about instead of #repealthe19th we only let women vote for the next 131 years so we're all even",
 'truncated': False,
 'user': {'contributors_enabled': False,
  'created_at': 'Thu Feb 28 14:25:50 -0500 2013',
  'default_profile': True,
  'default_profile_image': False,
  'description': 'USF| careful SpongeBob',
  'favourites_count': 9179,
  'followers_count': 360,
  'friends_count': 154,
  'geo_enabled': True,
  'id_str': '1228206432',
  'is_protected': False,
  'is_translator': False,
  '