In [1]:
import warnings
import pandas as pd
import networkx as nx
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from pprint import pprint
from graphrole import RecursiveFeatureExtractor, RoleExtractor

## Data Processing

In [2]:
# load the dataset
data = pd.read_csv('data-triell.csv')

In [3]:
data.head()

Unnamed: 0,username_1,username_2,timestamp_s
0,MetinHakverdi,Nico,1630267154
1,JensZSPD,Nico,1630264243
2,hschmidt,Nico,1630263176
3,MartinRosemann,Nico,1630264131
4,CarstenOvens,Nico,1630263327


In [4]:
data.shape

(16660, 3)

## Creating Graph 

In [5]:
def create_directed_graph(df):
    g = nx.from_pandas_edgelist(df, source='username_1',
                                target='username_2',
                                create_using=nx.DiGraph())
    return g

In [6]:
g = create_directed_graph(data)

In [7]:
g

<networkx.classes.digraph.DiGraph at 0x1e9dde1c310>

## Role Extraction 

In [8]:
# extract features
feature_extractor = RecursiveFeatureExtractor(g)
features = feature_extractor.extract_features()

In [17]:
# assign node roles
role_extractor = RoleExtractor(n_roles=None)
role_extractor.extract_role_factors(features)
node_roles = role_extractor.roles

print('\nnode_roles:')
pprint(node_roles)

print('\nNode role membership by percentage:')
print(role_extractor.role_percentage.round(2))


node_roles:
{'00schneemann': 'role_0',
 '0815_goes_4711': 'role_0',
 '0Murat0': 'role_6',
 '0__Punkt': 'role_0',
 '0x127001': 'role_6',
 '100erneuerbar': 'role_0',
 '10DMark': 'role_0',
 '11achim71': 'role_3',
 '123Lenchen': 'role_0',
 '1893ituka': 'role_0',
 '1900HO': 'role_7',
 '1951Mann': 'role_0',
 '1964Andreasda': 'role_0',
 '1Klaudius62': 'role_6',
 '1arealtalk': 'role_0',
 '1teufelskreis': 'role_0',
 '2008_jens': 'role_0',
 '31O6926': 'role_5',
 '3697elfriedel': 'role_6',
 '4g0nm': 'role_5',
 '558d86617a41449': 'role_4',
 '6cks_': 'role_0',
 '73erStrikesBack': 'role_6',
 '7654321o': 'role_6',
 '90sChris': 'role_7',
 '98mercedes98': 'role_7',
 '99Michel99': 'role_3',
 'A1207Sa': 'role_6',
 'ABCWorldNews': 'role_0',
 'ABaerbock': 'role_0',
 'ABalsliemke': 'role_3',
 'ADressel': 'role_2',
 'AGBuelow': 'role_0',
 'AGH_Berlin': 'role_0',
 'AGStadtkerne': 'role_0',
 'AK55828631': 'role_5',
 'AK_Meier': 'role_0',
 'AK__Social': 'role_6',
 'AKaiser94789344': 'role_0',
 'ALevermann': 'r

 'Katinka1368': 'role_7',
 'Katja84174609': 'role_7',
 'KatjaMast': 'role_4',
 'KatjaMitteldorf': 'role_7',
 'KatjesHansen': 'role_0',
 'Katrin73422154': 'role_0',
 'KatrinD80003849': 'role_0',
 'Katsanchebli': 'role_7',
 'KawiKla': 'role_0',
 'KayaKinkel': 'role_4',
 'KaymanMurat': 'role_0',
 'KazunguHass': 'role_0',
 'KeeyulDendris': 'role_6',
 'Keilerheute': 'role_6',
 'Keksy19': 'role_5',
 'Kellerpass': 'role_0',
 'KerstinCelina': 'role_0',
 'KerstinGriese': 'role_0',
 'KerstinKopp7': 'role_7',
 'KerstinLieder3': 'role_5',
 'KerstinPress': 'role_5',
 'Kerstin_Mueller': 'role_0',
 'KflexK': 'role_7',
 'Kh0815oderso': 'role_6',
 'KhSchenk': 'role_3',
 'KimSieber4': 'role_7',
 'KinderdocNina': 'role_0',
 'Kinderdoktorin': 'role_0',
 'Kingdomcometous': 'role_0',
 'KippelsNicole': 'role_3',
 'KirstenGerhards': 'role_6',
 'KirstenKappert': 'role_4',
 'KirstenTackmann': 'role_7',
 'Kitsuagi': 'role_0',
 'Kla138': 'role_3',
 'KlassikStiftung': 'role_0',
 'KlausAdelt': 'role_4',
 'KlausBeck

 '_Isabellalina_': 'role_6',
 '_Iwan_Sirko_': 'role_1',
 '_Kinderrechte_': 'role_6',
 '_LilliFischer': 'role_4',
 '_MaleficentGirl': 'role_1',
 '_MartinHagen': 'role_0',
 '_MettIgel_': 'role_3',
 '_Paperlapapp': 'role_3',
 '_Slifer_': 'role_6',
 '_TheMack_1': 'role_2',
 '_TravellingJack': 'role_5',
 '____Jeannette': 'role_3',
 '_ansgar': 'role_0',
 '_donalphonso': 'role_0',
 '_jproe': 'role_1',
 '_juliaschramm': 'role_0',
 '_jvl': 'role_6',
 '_klimanotstand_': 'role_0',
 '_programmingDad': 'role_0',
 '_richtig_falsch': 'role_0',
 '_stillCraZy': 'role_0',
 '_tillwe_': 'role_0',
 '_tswolf': 'role_3',
 'aCooore': 'role_0',
 'a_braxes': 'role_4',
 'a_nnaschneider': 'role_0',
 'a_watch': 'role_7',
 'aachen_besetzen': 'role_0',
 'abdi86802726': 'role_6',
 'abnonhar': 'role_7',
 'abolladura1': 'role_5',
 'abususu': 'role_0',
 'achimdittler': 'role_0',
 'addi_ad_portas': 'role_0',
 'addiks': 'role_3',
 'aedwardslevy': 'role_0',
 'af_oke': 'role_3',
 'afd_wallduern': 'role_0',
 'affeu2': 'role_

 'sylviageiger70': 'role_6',
 'systemanalysen': 'role_5',
 'syt_tkmk': 'role_3',
 't0mez': 'role_4',
 't0nky1': 'role_1',
 't_e_e_k': 'role_3',
 't_kurz': 'role_0',
 't_woelfer': 'role_0',
 'tagesschau': 'role_0',
 'tagesthemen': 'role_0',
 'tanit': 'role_0',
 'tanzmaus1407': 'role_1',
 'tapir_mit_maske': 'role_7',
 'tazKlima': 'role_0',
 'tazgezwitscher': 'role_0',
 'tcomster': 'role_0',
 'teambluelife': 'role_0',
 'teamdeu': 'role_0',
 'teamdpara': 'role_0',
 'teleprism': 'role_0',
 'teletabbe': 'role_6',
 'teresabuecker': 'role_0',
 'teskariel': 'role_0',
 'testoiron': 'role_6',
 'textlastig': 'role_0',
 'textmagd': 'role_0',
 'thPichler': 'role_0',
 'th_sattelberger': 'role_7',
 'the4thpip': 'role_5',
 'theNeo42': 'role_4',
 'theWellRedMage': 'role_0',
 'the_real_urbsi': 'role_0',
 'thekla_walker': 'role_0',
 'themroc': 'role_0',
 'theresahein_': 'role_3',
 'thesismum': 'role_0',
 'thewofb': 'role_5',
 'thinkBTO': 'role_0',
 'thomas2theleft': 'role_0',
 'thomas_grieme': 'role_0',
 

In [24]:
#print list of usernames with specific role and add to a list

username_list = []
for username, role in node_roles.items():
    if role == 'role_7':
        username_list.append(username)

print(username_list)

['1900HO', '90sChris', '98mercedes98', 'ARasilop', 'AWidmannMauz', 'AldousLeary', 'Alece92912603', 'Alexand36061482', 'AlexandraCarl2', 'AlpixTM', 'Amira_M_Ali', 'AnLa18294437', 'Andreas_Schwab', 'Anemalon19', 'Anja_aaaaa', 'ArneBab', 'AusBine', 'AxelHohl', 'BLN_CoMeCo', 'B_Gunners21', 'Ba5el', 'Bangeline6', 'Bennix1308', 'Bettina58130936', 'BobKelzow', 'Bode_Anne1', 'BoehningB', 'BreuerLeopold', 'BrunettiGi', 'CKross36', 'CMller3', 'Cardiesel2', 'Chris32770312', 'Chris__Tus', 'Christi43738671', 'ChristianDeut17', 'ChrizKu', 'Claudia2795', 'ClemensLeathley', 'Cochiban', 'Conni_Moehring', 'ConnyR0mer', 'Const99102', 'DanielaHuetter', 'Daniele95206428', 'DarioPower', 'Deliandelicious', 'Demetrio_Sca', 'DennisGladiator', 'DerKinderarzt', 'DerPrivatlehrer', 'Der_Bjoerni', 'Der_ToasterGmbh', 'Dericik76', 'DieterPrick', 'DoeSpam', 'Dorfblume1', 'DrCatlicious', 'DrHanisch', 'ElisabethKaleko', 'EnkelErich', 'Eric_mit_C', 'Erwin24946838', 'Estee77680659', 'EvAna3388', 'Eva0112', 'FKuschel', 'FR

In [25]:
k = g.subgraph(username_list) 

In [26]:
k

<networkx.classes.digraph.DiGraph at 0x1e99e5fcee0>

### Role Extraction on Random Network 

In [29]:
len(g)

4114

In [45]:
h = nx.erdos_renyi_graph(100, 0.7)

In [46]:
h

<networkx.classes.graph.Graph at 0x1e99e7bb850>

In [47]:
# extract features
feature_extractor = RecursiveFeatureExtractor(h)
features = feature_extractor.extract_features()

In [48]:
# assign node roles
role_extractor = RoleExtractor(n_roles=None)
role_extractor.extract_role_factors(features)
node_roles = role_extractor.roles

print('\nnode_roles:')
pprint(node_roles)

print('\nNode role membership by percentage:')
print(role_extractor.role_percentage.round(2))


node_roles:
{0: 'role_2',
 1: 'role_4',
 2: 'role_2',
 3: 'role_2',
 4: 'role_6',
 5: 'role_2',
 6: 'role_1',
 7: 'role_2',
 8: 'role_1',
 9: 'role_4',
 10: 'role_2',
 11: 'role_2',
 12: 'role_1',
 13: 'role_2',
 14: 'role_4',
 15: 'role_6',
 16: 'role_2',
 17: 'role_2',
 18: 'role_2',
 19: 'role_1',
 20: 'role_2',
 21: 'role_2',
 22: 'role_1',
 23: 'role_1',
 24: 'role_2',
 25: 'role_2',
 26: 'role_2',
 27: 'role_2',
 28: 'role_2',
 29: 'role_2',
 30: 'role_1',
 31: 'role_2',
 32: 'role_2',
 33: 'role_4',
 34: 'role_1',
 35: 'role_1',
 36: 'role_2',
 37: 'role_2',
 38: 'role_1',
 39: 'role_1',
 40: 'role_2',
 41: 'role_1',
 42: 'role_2',
 43: 'role_4',
 44: 'role_2',
 45: 'role_2',
 46: 'role_2',
 47: 'role_1',
 48: 'role_2',
 49: 'role_2',
 50: 'role_6',
 51: 'role_2',
 52: 'role_2',
 53: 'role_2',
 54: 'role_2',
 55: 'role_1',
 56: 'role_3',
 57: 'role_2',
 58: 'role_2',
 59: 'role_2',
 60: 'role_1',
 61: 'role_1',
 62: 'role_1',
 63: 'role_1',
 64: 'role_2',
 65: 'role_2',
 66: 'r