In [27]:
import pandas as pd
import networkx as nx
from itertools import combinations,permutations
from networkx.algorithms.community import greedy_modularity_communities

cleaning and loading data

In [28]:


def safecast(val, to_type=int, default=None):
    try:
        return to_type(val)
    except (ValueError, TypeError):
        return default

friendshp_df = pd.read_csv('Friendship-network_data_2013_CLEANED.csv', sep='\t')
G_fren = nx.DiGraph()
for row in friendshp_df.itertuples(index=False):
    u, v = int(row.i), int(row.j)
    G_fren.add_edge(u, v)


physa_contacts_df = pd.read_csv('High-School_data_2013_CLEANED.csv', sep='\t')
G_physa = nx.Graph()
physa_edges = physa_contacts_df[['i', 'j']].astype(int).drop_duplicates().values
G_physa.add_edges_from(physa_edges)

contactdiary_df = pd.read_csv('Contact-diaries-network_data_2013_CLEANED.csv', sep='\t')
G_cdiary = nx.DiGraph()
for row in contactdiary_df.itertuples(index=False):
    G_cdiary.add_edge(int(row.i), int(row.j), weight=int(row.w))


fb_df = pd.read_csv('Facebook-known-pairs_data_2013_CLEANED.csv', sep='\t')
G_fbzucc = nx.Graph()
for row in fb_df.itertuples(index=False):
    u, v, w = int(row.i), int(row.j), int(row.w)
    G_fbzucc.add_edge(u, v, weight=w)


def create_metadata_dict(df):
    return df.set_index('ID').to_dict(orient='index')

metadata_df = pd.read_csv('mt actual.txt', sep='\t', header=None, names=['ID', 'Class', 'Gender'])
metadata_df_copy = metadata_df.copy()
metadata_dict = create_metadata_dict(metadata_df_copy)


metadata_count = 0
for x in metadata_dict:
    metadata_count += 1

print("Graphs loaded:")
print(f"Friendship: {G_fren.number_of_nodes()} nodes, {G_fren.number_of_edges()} edges")
print(f"Physical: {G_physa.number_of_nodes()} nodes, {G_physa.number_of_edges()} edges")
print(f"Diary: {G_cdiary.number_of_nodes()} nodes, {G_cdiary.number_of_edges()} edges")
print(f"Facebook: {G_fbzucc.number_of_nodes()} nodes, {G_fbzucc.number_of_edges()} edges")
print(f"Metadata loaded for {metadata_count} students.")


Graphs loaded:
Friendship: 134 nodes, 668 edges
Physical: 327 nodes, 5818 edges
Diary: 120 nodes, 502 edges
Facebook: 156 nodes, 4515 edges
Metadata loaded for 329 students.


P1: Triads and friendship vs real life contact

Physical triads

In [37]:
triAds = set()

for n in G_physa.nodes():
    neighbors = list(G_physa.neighbors(n))
    for i in range(len(neighbors)):
        for j in range(i + 1, len(neighbors)):
            n1, n2 = neighbors[i], neighbors[j]
            if G_physa.has_edge(n1, n2):
                triAds.add(tuple(sorted((n, n1, n2))))

triAds = list(triAds)
print(f"Number of physical triads found: {len(triAds)}")


Number of physical triads found: 34220


fully and partially reciprocal triads in contact diaries

In [30]:


f_rec_cd_triads = set()
p_rec_cd_triads = set()

ndiary = list(G_cdiary.nodes())

# checking all perm in all combs to make sure nothing is missed
for trio in combinations(ndiary, 3):
    found = False  
    
    for perm in permutations(trio, 3):
        a, b, c = perm

        
        cycle = (
            G_cdiary.has_edge(a, b) and
            G_cdiary.has_edge(b, c) and
            G_cdiary.has_edge(c, a)
        )
        
        if cycle:
            ab_rec = G_cdiary.has_edge(b, a)
            bc_rec = G_cdiary.has_edge(c, b)
            ca_rec = G_cdiary.has_edge(a, c)

            tsorted = tuple(sorted((a, b, c)))

            if ab_rec and bc_rec and ca_rec:
                f_rec_cd_triads.add(tsorted)
            else:
                p_rec_cd_triads.add(tsorted)

            found = True
            break  

#printing the results yipeeee
print(f"Contact diary Graph:")
print(f"Fully reciprocal triads: {len(f_rec_cd_triads)}")
print(f"Partly reciprocal triads: {len(p_rec_cd_triads)}")
print("Example (fully reciprocal triads):", list(f_rec_cd_triads)[:5])
print("Example (partly reciprocal triads):", list(p_rec_cd_triads)[:5])


Contact diary Graph:
Fully reciprocal triads: 47
Partly reciprocal triads: 104
Example (fully reciprocal triads): [(15, 21, 826), (177, 400, 945), (400, 428, 945), (245, 502, 691), (3, 147, 407)]
Example (partly reciprocal triads): [(268, 407, 504), (211, 448, 845), (177, 400, 984), (106, 272, 587), (502, 634, 869)]


fully and partially reciprocal triads in friendship

In [31]:



f_rec_fs_triads = set()
p_rec_fs_triads = set()

# checking all perm in all combs to make sure nothing is missed
nfriend = list(G_fren.nodes())
for trio in combinations(nfriend, 3):
    for perm in permutations(trio, 3):
        a, b, c = perm

        
        cycle = (
            G_fren.has_edge(a, b) and
            G_fren.has_edge(b, c) and
            G_fren.has_edge(c, a)
        )

        if cycle:
           
            ab_recip = G_fren.has_edge(b, a)
            bc_recip = G_fren.has_edge(c, b)
            ca_recip = G_fren.has_edge(a, c)

            tsorted = tuple(sorted((a, b, c)))

            if ab_recip and bc_recip and ca_recip:
                f_rec_fs_triads.add(tsorted)
            else:
                p_rec_fs_triads.add(tsorted)

            break  

#printing the results yipeeee
print(f"Friendship Graph:")
print(f"Fully reciprocal triads: {len(f_rec_fs_triads)}")
print(f"Partly reciprocal triads: {len(p_rec_fs_triads)}")
print("Example (fully reciprocal triads):",
      list(f_rec_fs_triads)[:5])
print("Example (partly reciprocal triads):",
      list(p_rec_fs_triads)[:5])


Friendship Graph:
Fully reciprocal triads: 200
Partly reciprocal triads: 146
Example (fully reciprocal triads): [(147, 407, 674), (45, 79, 674), (70, 101, 132), (222, 343, 867), (55, 883, 894)]
Example (partly reciprocal triads): [(72, 147, 674), (45, 388, 496), (28, 327, 353), (325, 622, 624), (170, 265, 883)]


finding overlap

In [40]:
cdt_set = set(frozenset(triad) for triad in f_rec_cd_triads)
fst_set = set(frozenset(triad) for triad in f_rec_fs_triads)
common_fully_reciprocal_triads = cdt_set.intersection(fst_set)


print(f" Overlap of fully reciprocal triads in contact diary and friendship graphs:")
print(f"Total in Diary: {len(cdt_set)}")
print(f"Total in Friendship: {len(fst_set)}")
print(f"Only {len(common_fully_reciprocal_triads)} ({(len(common_fully_reciprocal_triads)/len(fst_set)*100 if fst_set else 0):.2f}%) for friends actually mutually reported meeting each other physically too.")
print("Example overlapping triads:", [tuple(triad) for triad in list(common_fully_reciprocal_triads)[:5]])


 Overlap of fully reciprocal triads in contact diary and friendship graphs:
Total in Diary: 47
Total in Friendship: 200
Only 9 (4.50%) for friends actually mutually reported meeting each other physically too.
Example overlapping triads: [(634, 691, 1332), (3, 147, 407), (425, 101, 119), (520, 576, 605), (240, 425, 101)]




average edge weight fully reciprocal vs whole graph

In [33]:
# Finding if mutual triads spend more time together than the average person-to-person edge

triadAvgWeights = []  

for triad in common_fully_reciprocal_triads:
    nodeA, nodeB, nodeC = triad 

    
    edgeList = [
        (nodeA, nodeB), (nodeB, nodeA),
        (nodeB, nodeC), (nodeC, nodeB),
        (nodeC, nodeA), (nodeA, nodeC)
    ]

    edgeWeights = [] 
   
    for fromNode, toNode in edgeList:
        if G_cdiary.has_edge(fromNode, toNode):
            weightValue = G_cdiary[fromNode][toNode]['weight']
            edgeWeights.append(weightValue)
        else:
           
            edgeWeights.append(0)

   
    sumOfWeights = sum(edgeWeights)  
    avgWeightTriad = sumOfWeights / len(edgeWeights)

    triadAvgWeights.append({
        'triadNodes': tuple(map(int, triad)),
        'avgWeight': avgWeightTriad,
        'allEdgeWeights': edgeWeights
    })


allEdgeWeightsList = [d['weight'] for i, j, d in G_cdiary.edges(data=True)]
totalEdgeCount = len(allEdgeWeightsList)
avgWeightWholeGraph = sum(allEdgeWeightsList) / totalEdgeCount if totalEdgeCount else 0

print(f"Average edge weight of the entire diary graph: {round(avgWeightWholeGraph, 2)}")

if triadAvgWeights:
    totalTriads = len(triadAvgWeights)
    avgOfTriads = sum(item['avgWeight'] for item in triadAvgWeights) / totalTriads
    print(f" Average of triad average weights: {round(avgOfTriads, 2)}")
else:
    print(" 0 triads to compare.")


Average edge weight of the entire diary graph: 2.89
 Average of triad average weights: 3.22


Part 2: Finding echo chambers

In [34]:
def detect_echo_chambers_directed(G_friend, min_group_size=3, min_density=0.8, max_outside_ratio=0.1, min_mutuality=0.5):
    echos = []

    # UD for clusters
    clusters = list(greedy_modularity_communities(G_friend.to_undirected()))

    for clust in clusters:
        members = set(clust)
        num_people = len(members)

        if num_people < min_group_size:
            continue

      
        totinside_edges = 0
        mutual_links = 0
        outlinks_outside = 0
        incoming_outside = 0

        for person in members:
            outlist = set(G_friend.successors(person))
            inlist = set(G_friend.predecessors(person))
            
            
            for pal in outlist:
                if pal in members:
                    totinside_edges += 1
                    if person in G_friend.successors(pal):
                        mutual_links += 0.5  
                else:
                    outlinks_outside += 1

           
            for pal in inlist:
                if pal not in members:
                    incoming_outside += 1

       
        max_possible_inside = num_people * (num_people - 1)

        dens = totinside_edges / max_possible_inside if max_possible_inside > 0 else 0
        recipro = mutual_links / totinside_edges if totinside_edges > 0 else 0
        leak_ratio = outlinks_outside / totinside_edges if totinside_edges > 0 else float('inf')

        
        if (dens >= min_density and 
            leak_ratio <= max_outside_ratio and
            recipro >= min_mutuality):

            echos.append({
                'group': members,
                'size': num_people,
                'dens': round(dens, 3),
                'recipro': round(recipro, 3),
                'leak_ratio': round(leak_ratio, 3),
                'edges_inside': totinside_edges,
                'out_links': outlinks_outside,
                'in_links': incoming_outside,
            })

    return echos


detected_echos = detect_echo_chambers_directed(G_fren)
print(f"Found {len(detected_echos)} possible echo chambers.")
for i, ech in enumerate(detected_echos[:3], 1):
    print(f"\nEcho Chamber {i}:")
    print(f"  Group: {sorted([int(x) for x in ech['group']])}")
    print(f"  Size: {ech['size']}")
    print(f"  Density: {ech['dens']}")
    print(f"  Reciprocity: {ech['recipro']}")
    print(f"  Leak Ratio: {ech['leak_ratio']}")
    print(f"  Edges Inside: {ech['edges_inside']}")
    print(f"  Outgoing Links: {ech['out_links']}")
    print(f"  Incoming Links: {ech['in_links']}")


Found 1 possible echo chambers.

Echo Chamber 1:
  Group: [124, 471, 970]
  Size: 3
  Density: 1.0
  Reciprocity: 0.5
  Leak Ratio: 0.0
  Edges Inside: 6
  Outgoing Links: 0
  Incoming Links: 0


Part 3: connectors nodes and bridge edges

In [None]:
def find_KONNECTorNodes_directed_strict(G_friend):
    comm_list = list(greedy_modularity_communities(G_friend.to_undirected()))

    node2comm = {}
    for i, comm in enumerate(comm_list):
        for n in comm:
            node2comm[n] = i

    KONNECTorNodes = []

    for n in G_friend.nodes():
        out_comms = set()
        in_comms = set()

        for nbr in G_friend.successors(n):
            if nbr in node2comm:
                out_comms.add(node2comm[nbr])

        for nbr in G_friend.predecessors(n):
            if nbr in node2comm:
                in_comms.add(node2comm[nbr])

        own_comm = node2comm.get(n, None)
        out_comms.discard(own_comm)
        in_comms.discard(own_comm)

        
        if len(out_comms) >= 1 and len(in_comms) >= 1 and len(out_comms.union(in_comms)) >= 2:
            KONNECTorNodes.append({
                'node': n,
                'out_comms': out_comms,
                'in_comms': in_comms,
                'num_out': len(out_comms),
                'num_in': len(in_comms),
                'my_comm': own_comm
            })

    return KONNECTorNodes


directed_connectors_strict = find_KONNECTorNodes_directed_strict(G_fren)
print(f" Found {len(directed_connectors_strict)} connector students.")
for k in directed_connectors_strict[:5]:
    print(f"Node {k['node']} connects OUT to {k['out_comms']} and IN from {k['in_comms']}")


 Found 14 connector students.
Node 779 connects OUT to {1} and IN from {6}
Node 147 connects OUT to {1, 6} and IN from {1}
Node 407 connects OUT to {1, 6} and IN from {1, 6}
Node 674 connects OUT to {0, 6} and IN from {0, 6}
Node 327 connects OUT to {4, 5} and IN from {4}


In [36]:
def detectVITALLINKS(graphFriend, topN=10):
   
    edgeBetweenness = nx.edge_betweenness_centrality(graphFriend) 

    sortedEdges = sorted(edgeBetweenness.items(), key=lambda x: x[1], reverse=True)

   
    topBridgeEdges = []
    for edgeInfo in sortedEdges[:topN]:
        edgeTuple = edgeInfo[0]
        betweennessScore = edgeInfo[1]
        bridgeData = {
            'edge': edgeTuple,
            'betweenness': round(betweennessScore, 5)
        }
        topBridgeEdges.append(bridgeData)

    return topBridgeEdges


bridgeResults = detectVITALLINKS(G_fren, topN=10)

print("Top 10 Vital Links by Betweenness \n")
for bridge in bridgeResults:
    nodeU, nodeV = bridge['edge']
    betweennessVal = bridge['betweenness'] 
    print(f"Edge ({int(nodeU)}, {int(nodeV)}) with betweenness {betweennessVal}")


Top 10 Vital Links by Betweenness 

Edge (205, 691) with betweenness 0.05625
Edge (117, 407) with betweenness 0.0524
Edge (691, 883) with betweenness 0.049
Edge (272, 441) with betweenness 0.04649
Edge (605, 201) with betweenness 0.0452
Edge (691, 205) with betweenness 0.04442
Edge (691, 245) with betweenness 0.04349
Edge (327, 27) with betweenness 0.03862
Edge (468, 125) with betweenness 0.03558
Edge (343, 101) with betweenness 0.03504
