In [1]:
import os
import re
import datetime as dt
import time
import random
# import sqlite3
import json

# data science
import math
import numpy as np
import pandas as pd

In [2]:
# configurations

# Allow multiple outputs for each cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# Suppress warnings 
import warnings
warnings.filterwarnings('ignore')

# display all columns
pd.set_option('display.max_columns', None)

# suppress auto-conversion to scientific notation
pd.set_option('display.precision', 6)

## network
- design
 - edge = from, srce, weight = sentiment
 - edge weight = sentiment of tweet
 - node size = # of retweets or mentions? # of followers?
 - node color = gender?
 - node content = user info, maybe URL
 - tweet text - in edge or node?
 - url to profile or tweet - in edge or node?

# Engagements & Interactions

In [3]:
associations = pd.read_csv('Data/associations.csv')
tweets = pd.read_csv('Data/tweets3.csv')

In [4]:
associations['associationType'].value_counts()
# focus: 'retweet'

retweet    224702
self        93305
reply       47014
mention     33123
Name: associationType, dtype: int64

In [5]:
retweets = associations[associations['associationType'] == 'retweet']
retweets

Unnamed: 0,associationId,tweetId,screenName,associationType,keyword
5118,5119,4998,wordsrox,retweet,NancyPelosi
5119,5120,4999,Objective_ish,retweet,NancyPelosi
5121,5122,5001,criynwa293,retweet,jonfavs
5122,5123,5002,krissymom2,retweet,NancyPelosi
5124,5125,5004,jestor810,retweet,NancyPelosi
5125,5126,5005,LisaD787,retweet,earthma23
5127,5128,5007,GBan14,retweet,NancyPelosi
5128,5129,5008,MsHasNoPatience,retweet,SarahSpain
5130,5131,5010,Perspectvz,retweet,NancyPelosi
5131,5132,5011,CarnivoraUrsida,retweet,wesley_jordan


In [6]:
retweets = retweets[retweets['screenName'] != retweets['keyword']]
retweets

Unnamed: 0,associationId,tweetId,screenName,associationType,keyword
5118,5119,4998,wordsrox,retweet,NancyPelosi
5119,5120,4999,Objective_ish,retweet,NancyPelosi
5121,5122,5001,criynwa293,retweet,jonfavs
5122,5123,5002,krissymom2,retweet,NancyPelosi
5124,5125,5004,jestor810,retweet,NancyPelosi
5125,5126,5005,LisaD787,retweet,earthma23
5127,5128,5007,GBan14,retweet,NancyPelosi
5128,5129,5008,MsHasNoPatience,retweet,SarahSpain
5130,5131,5010,Perspectvz,retweet,NancyPelosi
5131,5132,5011,CarnivoraUrsida,retweet,wesley_jordan


In [7]:
from pyvis.network import Network
def renderNetwork(df):
    net = Network(width='800px', height='600px', bgcolor='#222222', font_color='#EEEEEE', notebook=True)
    net.barnes_hut(overlap=1)
    
    nodes = pd.concat([df['screenName'], df['keyword']], 0)
    nodes = pd.DataFrame(nodes, columns=['screenName'])
    nodes.drop_duplicates(inplace=True)
    nodes.reset_index(drop=True, inplace=True)
    
    net.add_nodes(nodes['screenName'].tolist())
    net.add_edges(list(zip(df['screenName'], df['keyword'])))
    
    net.prep_notebook()
    return net.show(f"output/network_{time.strftime('%H%M')}.html")

def analyzeNetwork(net, df):
    nodes = pd.concat([df['screenName'], df['keyword']], 0)
    nodes = pd.DataFrame(nodes, columns=['screenName'])
    nodes.drop_duplicates(inplace=True)
    nodes.reset_index(drop=True, inplace=True)
    
    net.add_nodes(nodes['screenName'].tolist())
    net.add_edges(list(zip(df['screenName'], df['keyword'])))
    return net

In [8]:
def constructNetwork(retweets, currentNodes, leaves, branches, degrees, currentDegree):
    print(f'retweets:{len(retweets)}')
    print(f'current degree:{currentDegree}')
    print(f'current nodes:{currentNodes}')
    nextNodes = []
    edges = pd.DataFrame()
    for node in currentNodes:
        # unique users that retweeted initialNodes
        shortList = retweets[retweets['keyword'] == node]['screenName'].unique()

        # selected top n users from shortList that received the most retweets
        currentLeaves = retweets[retweets['keyword'].isin(shortList)]['keyword'].value_counts()[:leaves].index.tolist()

        # append branches to next nodes
        nextNodes = nextNodes + retweets[retweets['keyword'].isin(shortList)]['keyword'].value_counts()[:branches].index.tolist()

        # copy n edges from retweets stemming from current node
        newEdges = retweets[retweets['screenName'].isin(currentLeaves)&(retweets['keyword'] == node)][:leaves]

        if len(edges) == 0:
            edges = newEdges.copy()
        else:
            edges = pd.concat([edges,newEdges])

    edges['degree'] = currentDegree
    if degrees > 0:
        return pd.concat([edges,
                          constructNetwork(retweets[retweets['keyword'].isin(currentNodes) == False], 
                                           nextNodes,
                                           leaves,
                                           branches + 2, 
                                           degrees - 1,
                                           currentDegree + 1
                                          )],0)
    else:
        return edges

In [9]:
# initial nodes: 'TaranaBurke', 'Alyssa_Milano', 'LaurenJauregui', 'SenGillibrand'
currentNodes = ['TaranaBurke', 'Alyssa_Milano', 'LaurenJauregui', 'SenGillibrand']

leaves = 30
branches = 4
degrees = 5
currentDegree = 0

In [13]:
df = constructNetwork(df, currentNodes, leaves, branches, degrees, currentDegree)
df

retweets:222763
current degree:0
current nodes:['TaranaBurke', 'Alyssa_Milano', 'LaurenJauregui', 'SenGillibrand']
retweets:199002
current degree:1
current nodes:['Blackamazon', 'womensmarch', 'AngryBlackLady', 'ShaunKing', 'Blackamazon', 'LadyThriller69', 'MaureenShaw', 'evry2min', 'infLAURENZA', 'DREWSINGS', 'DulleyTopBooks', 'PaganMediaBites', 'WomenOccupyHwd', 'chillibeanboy', 'ReignOfApril', 'SophiaBush']
retweets:197094
current degree:2
current nodes:['ReaganGomez', 'shanikharris', 'IWriteAllDay_', 'terriah01', 'docfreeride', 'WebOften', 'VintageKnits', 'cdnwomenfdn', 'iSenseChange', 'htownfeminista', 'marygauthier_', 'Time4Rise', 'WentRogue', 'docfreeride', 'BrownGirlBegins', 'joylladiebnx', 'MadDiva007', 'FoxyJazzabelle', 'Ez4u2say_Janis', 'KelciRee', 'TinaBit', 'dairyblondeLN66', '_Aesthetixx', 'ReaganGomez', 'shanikharris', 'IWriteAllDay_', 'terriah01', 'docfreeride', 'WebOften', 'AynRandPaulRyan', 'kharyp', '_ShaunDougherty', 'gettinnoticedmo', 'Genosworld', 'BKnight1995', '

Unnamed: 0,associationId,tweetId,screenName,associationType,keyword,degree
96455,96456,95009,CeciliaNano,retweet,TaranaBurke,0
100882,100883,99369,AngryBlackLady,retweet,TaranaBurke,0
111701,111702,110008,BettyBuckley,retweet,TaranaBurke,0
137204,137205,135259,IWriteAllDay_,retweet,TaranaBurke,0
137813,137814,135863,IBJIYONGI,retweet,TaranaBurke,0
138652,138653,136696,JeanHyams,retweet,TaranaBurke,0
142687,142688,140661,shanikharris,retweet,TaranaBurke,0
143281,143282,141246,Blackamazon,retweet,TaranaBurke,0
144995,144996,142930,Compassioning,retweet,TaranaBurke,0
145287,145288,143221,leanne4pa,retweet,TaranaBurke,0


In [None]:
_ = df[(df['keyword'].isin(df['screenName']) == False) & (df['degree'] != 0)].index
df = retweets#.drop(_)

In [14]:
renderNetwork(df)

# Network Analysis

In [40]:
net = Network(width='1280px', height='700px', bgcolor='#222222', font_color='#EEEEEE', notebook=True)
net.barnes_hut(overlap=1)
net = analyzeNetwork(net, df)

In [45]:
net.get_adj_list()

{'CeciliaNano': {'TaranaBurke'},
 'AngryBlackLady': {'BrownGirlBegins',
  'FoxyJazzabelle',
  'MadDiva007',
  'PrettyFootWoman',
  'TaranaBurke',
  'WentRogue',
  'docfreeride',
  'joylladiebnx',
  'kyliesparks',
  'lydiaelise'},
 'BettyBuckley': {'TaranaBurke'},
 'IWriteAllDay_': {'Blackamazon', 'TaranaBurke'},
 'IBJIYONGI': {'TaranaBurke'},
 'JeanHyams': {'TaranaBurke'},
 'shanikharris': {'Blackamazon', 'SenGillibrand', 'TaranaBurke'},
 'Blackamazon': {'IWriteAllDay_',
  'Megan_Sass',
  'ReaganGomez',
  'StlGal_36',
  'TaranaBurke',
  'WebOften',
  'dana_flo',
  'docfreeride',
  'everyhumanknows',
  'exfkaty',
  'normandiewilson',
  'shanikharris',
  'terriah01'},
 'Compassioning': {'TaranaBurke'},
 'leanne4pa': {'TaranaBurke'},
 'windmillcharger': {'MooseLambResist', 'ProudResister', 'TaranaBurke'},
 'shreec': {'TaranaBurke'},
 'nwseal11': {'TaranaBurke'},
 'JW4Resistance': {'TaranaBurke'},
 'kharyp': {'BlaisBlais5', 'MaureenShaw', 'TaranaBurke', 'lkjtexas'},
 'ShaunKing': {'Ez4u2sa

In [460]:
net.neighbors('TaranaBurke')

{'AngryBlackLady',
 'BettyBuckley',
 'Blackamazon',
 'BreaktheCycleDV',
 'EqualRightsAdv',
 'IBJIYONGI',
 'JW4Resistance',
 'PattyArquette',
 'QueenMab87',
 'ShaunKing',
 'YWCA_Canada',
 'aijenpoo',
 'domesticworkers',
 'farrah_khan',
 'kharyp',
 'knowyourIX',
 'leanne4pa',
 'shanikharris',
 'windmillcharger'}