In [78]:
import networkx as nx
import networkx.algorithms.community as nx_comm 
import pandas as pd
import matplotlib.pyplot as plt
print(nx.__version__)

3.5


In [79]:
INPUT_ADJ_MATRIX = './data/subreddit_graph_ChatGPT.csv'
MIN_WEIGHT = 0
NAME = "ChatGPT"
FILTERING_SUBREDDITS = ['ChatGPT']

In [80]:
stack_df = pd.read_csv(INPUT_ADJ_MATRIX)
print(stack_df.shape)

(7667, 7667)


Build edges

In [81]:
df_long = stack_df.copy()
df_long['index'] = df_long.columns
df_long = df_long.melt(id_vars='index', var_name='node2', value_name='weight')
df_long = df_long.rename(columns={'index': 'node1'})
df_long.head()
# display(df_long)

Unnamed: 0,node1,node2,weight
0,10s,10s,7
1,10thDentist,10s,0
2,10xPennyStocks,10s,0
3,1200isjerky,10s,0
4,1200isplenty,10s,0


## Weighted networks

In [82]:

short_df = df_long[df_long["weight"]>MIN_WEIGHT]
stack_edgelist = list(zip(short_df["node1"],
                          short_df["node2"]))
weight_ser = pd.Series(short_df["weight"]).to_list()
wg = nx.Graph(name="Weighted Reply Graph")
print(len(weight_ser))
ebunch = []
for nodes,weight in zip(stack_edgelist,weight_ser):
    if weight >= 1 and nodes[0] != nodes[1]:
        ebunch.append((nodes[0],nodes[1],weight))
print(len(ebunch))
wg.add_weighted_edges_from(ebunch)
print(wg)

1475451
1467784
Graph named 'Weighted Reply Graph' with 7667 nodes and 733892 edges


Remove target AI subreddit(s) and all isolated nodes

In [83]:
filter_graph = wg.subgraph([node for node in wg.nodes() if node not in FILTERING_SUBREDDITS]).copy()
filter_graph.remove_nodes_from(list(nx.isolates(filter_graph)))
filter_graph.number_of_nodes()

7666

In [84]:
connected_components = nx.connected_components(filter_graph)
count = 0
for i, comp in enumerate(connected_components):
    if len(comp) == 1:
        count += 1
    # print(f"Component {i}:{comp}")
print(f"Number of components with a single node: {count}")
i+1

Number of components with a single node: 0


1

In [85]:
def community_layout(G, communities, node2comm, k_within=0.2, k_between=1.5, seed=42):
    """
    Two-level layout:
      - Spring layout within each community
      - Spring layout between communities
    """
    import itertools

    # Build community graph (supernodes = communities)
    comm_graph = nx.Graph()
    for i, c in enumerate(communities):
        comm_graph.add_node(i)

    for u, v in G.edges():
        cu, cv = node2comm[u], node2comm[v]
        if cu != cv:
            comm_graph.add_edge(cu, cv)

    # Layout communities
    pos_comm = nx.spring_layout(comm_graph, k=k_between, seed=seed)

    # Layout nodes within each community
    pos = {}
    for i, c in enumerate(communities):
        subG = G.subgraph(c)
        pos_sub = nx.spring_layout(subG, k=k_within, seed=seed, )
        
        # center community layout at its community position
        cx, cy = pos_comm[i]
        mx = sum(x for x, y in pos_sub.values()) / len(pos_sub)
        my = sum(y for x, y in pos_sub.values()) / len(pos_sub)
        for node, (x, y) in pos_sub.items():
            pos[node] = (x - mx + cx, y - my + cy)

    return pos

In [86]:
pltm =plt.cm.get_cmap("turbo", 30)

  pltm =plt.cm.get_cmap("turbo", 30)


### greedy

In [87]:
greedy_modularity_communities = nx_comm.greedy_modularity_communities(filter_graph,best_n=20)
print(len(greedy_modularity_communities))
for community in greedy_modularity_communities:
    print(community)

6
frozenset({'Trading', 'alienearth', 'AskAnEscort', 'Banking', 'RepladiesDesigner', 'InvestingandTrading', 'golang', 'raspberry_pi', 'BayAreaRealEstate', 'saasbuild', 'SHIVA', 'LawPH', 'reactnative', 'HailuoAiOfficial', 'DataAnnotationTech', 'SomebodyMakeThis', 'bose', 'uklandlords', 'engineeringmemes', 'NotGPT', 'Slack', 'accessibility', 'Astrology_Vedic', 'truespotify', 'COPYRIGHT', 'email', 'MangakaStudio', 'iosgaming', 'hubspot', 'AusSkincare', 'remoteworking', 'Bhopal', 'MicrosoftTeams', 'small_business_ideas', 'LawEthicsandAI', 'SocialMediaMarketing', 'Bitwarden', 'typescript', 'India4all', 'Indiawatchbuffs', 'motivation', 'Skeuomorphism', 'conselhodecarreira', 'PHP', 'QuantumComputing', 'Entrepreneurship', 'alphaandbetausers', 'Chub_AI', 'CallOfDuty', 'OPBR', 'steam_giveaway', 'website', 'XoulAI', 'Gemini', 'ArcBrowser', 'SoftwareEngineerJobs', 'SEO_for_AI', 'battlefield2042', 'dalle', 'IntelligenceScaling', 'ScamCenter', 'KlingAI_Videos', 'aws', 'enlightenment', 'GuitarAmps', 

### async fluidc

In [88]:
async_fluidc = []
for component in nx.connected_components(filter_graph):
    _graph = filter_graph.subgraph(component)
    _async_fluidc = list(nx_comm.asyn_fluidc(_graph, k=min(len(component), 15), max_iter=20000, seed=42))
    async_fluidc.extend(_async_fluidc)  
print(len(async_fluidc))
for i,comm in enumerate(async_fluidc):
    if i > 10:
        break
    print(i,comm)
sum = 0
for comm in async_fluidc:
    sum += len(comm)
print(sum)

15
0 {'overwatch2', 'AccurateBattleSim', 'hmm', 'rockhounds', 'VisionPro', 'ZenlessZoneZero', 'scifiwriting', 'lexington', 'TeamfightTactics', 'FORSAKENROBLOX', 'ZZZ_Official', 'MHWilds', 'ShadowFightArena', 'manhwarecommendations', 'rarepuppers', 'powerscales', 'medical', 'BatmanArkham', 'GTA6', 'japanlife', 'spacemarines', 'pcgaming', 'inZOI', 'StarRailStation', 'occult', 'TwistedMetal', 'oblivion', 'PSVR', 'Indiangamers', 'EASportsCFB', 'CharacterRant', 'storyofseasons', 'MyHeroAcadamia', 'Necrontyr', 'thelastofus', 'Metroid', 'persona3reload', 'chimeboost', 'Starfield', 'killteam', 'AMDHelp', 'Witcher3', 'SakamotoDays', 'WorldofTanksConsole', 'Paleontology', 'Sekiro', 'terrariums', 'DeathBattleMatchups', 'Daredevil', 'steak', 'fatestaynight', 'ManyBaggers', 'Stellaris', '8bitdo', 'pokemongo', 'TheRookie', 'OnePieceSpoilers', 'RobinHood', 'ActionFigures', 'PowerScalingHub', 'danganronpa', 'zombies', 'fo4', 'menstrualcups', 'Pathfinder_Kingmaker', 'redstone', 'OnePiecePowerScaling', 

### lauvain

In [89]:
louvain_communities = nx_comm.louvain_communities(filter_graph,seed=42, weight='weight', max_level=300)
print(len(louvain_communities))
sum = 0
for com in louvain_communities:
    sum += len(com)
print(sum)
with open('data/louvain_communities_all.txt', 'w') as f:
     for i, com in enumerate(louvain_communities):
          print(i, com)
          f.write(f'{i} {com}\n')


8
7666
0 {'DigitalAudioPlayer', 'ScienceNcoolThings', 'Affiliate', 'Banking', 'RepladiesDesigner', 'InvestingandTrading', 'golang', 'lovable', 'BayAreaRealEstate', 'sportsanalytics', 'LawPH', 'HailuoAiOfficial', 'SomebodyMakeThis', 'turntables', 'IllegallyCuteCats', 'ExpectationVsReality', 'NotGPT', 'Slack', 'accessibility', 'juststart', 'analoghorror', 'Animesuggest', 'COPYRIGHT', 'email', 'MangakaStudio', 'KindroidAI', 'Soft_Launch', 'hubspot', 'remoteworking', 'MicrosoftTeams', 'small_business_ideas', 'LawEthicsandAI', 'SocialMediaMarketing', 'typescript', 'Darts', 'Psychedelics', 'beyondallreason', 'yggTorrents', 'HowToAIAgent', 'Medium', 'AgentsOfAI', 'conselhodecarreira', 'Entrepreneurship', 'PHP', 'OSINT', 'alphaandbetausers', 'modelcontextprotocol', 'programminghumor', 'steam_giveaway', 'Gemini', 'XoulAI', 'website', 'ArcBrowser', 'leaf', 'SoftwareEngineerJobs', 'NameMyCat', 'tech', 'SEO_for_AI', 'ChatGPT_Gemini', 'TelegramBots', 'InstagramDisabledHelp', 'dalle', 'IntelligenceS

In [90]:
# label_map = {
#     0: 'Game/Comics',
#     1: 'LifeStyle',
#     2: 'DigitalMarketing',
#     3: 'India',
#     4: 'News/Politics',
#     5: 'Technology',
#     6: 'Adult-Incel',
#     7: 'Music',
#     8: 'NSFW',
# }

In [91]:
# titles = pd.DataFrame.from_dict(label_map, orient='index', columns=['title']).reset_index()
# titles.columns = ['index','title']
com_df = pd.DataFrame({'nodes': louvain_communities}).reset_index()
# df = titles.merge(com_df, left_on='Id', right_on='nodes', how='right')
# df
# df

In [92]:
titles = pd.read_csv('data/subreddits/nodes.csv') #Id,Topic,Label,AI_MODEL
title_map=dict(zip(titles['Id'], titles['Topic']))
subreddit_label = {}
for i, row in com_df.iterrows():
    for node in row['nodes']:
        if node not in title_map:
            title_map[node] = f"IDK"
        else:
            subreddit_label[node] = title_map[node]
_df = pd.DataFrame.from_dict(subreddit_label, orient='index', columns=['title']).reset_index()
_df.columns = ['subreddit','label']

In [93]:
# Normalize edge list into DataFrame
# norm = []
# for e in edge_list:
#     if len(e) == 2:
#         s, t = e
#         w = 1
#     elif len(e) == 3:
#         s, t, w = e
#     else:
#         raise ValueError("Edges must be (source, target) or (source, target, weight).")
#     norm.append((s, t, w))
import os
basedir = f'data/subreddits-{NAME}'
os.makedirs(basedir, exist_ok=True)
nodes = list(wg.nodes())
df= pd.DataFrame(nodes, columns=['subreddit'])
df = df.merge(_df, on='subreddit', how='outer')
df.rename(columns={'subreddit':'Id', 'label':'Topic'}, inplace=True)
df['Label'] = df['Id']
df['AI_MODEL'] = 'Both'
for i, row in df.iterrows():
    _name = row['Id']
    ischatgpt, isclaude = False, False
    if wg.has_edge('ChatGPT', _name) or wg.has_edge(_name, 'ChatGPT'):
        ischatgpt = True
    if wg.has_edge('ClaudeAI', _name) or wg.has_edge(_name, 'ClaudeAI'):
        isclaude = True
    if ischatgpt and not isclaude:
        df.at[i, 'AI_MODEL'] = 'ChatGPT'
    elif isclaude and not ischatgpt:
        df.at[i, 'AI_MODEL'] = 'ClaudeAI'

df.fillna('unknown', inplace=True)
df.to_csv(f'{basedir}/nodes.csv', index=False)
edges = list(wg.edges(data='weight'))
edges_df = pd.DataFrame(edges, columns=["Source", "Target", "Weight"])
edges_df.to_csv(f'{basedir}/edges.csv', index=False)


In [94]:
# from transformers import pipeline

# labels = ["sports", "technology", "finance", "cooking", "politics", "health"]
# hypothesis_template = "These words are about {}."

# clf = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

In [95]:
# for i,comm in enumerate(async_fluidc):
#     print(f"Community {i}:")
#     text = ", ".join(comm)
#     out = clf(text, candidate_labels=labels, hypothesis_template=hypothesis_template, multi_label=False)
#     print(out["labels"][0], out["scores"][0])  # best label + confidence


In [96]:
# node2comm = {}
# gcom = louvain_communities
# all_nodes = set()
# for i, comm in enumerate(gcom):
#     for node in comm:
#         node2comm[node] = i
#         all_nodes.add(node)
# _graph = filter_graph.subgraph([node for node in filter_graph.nodes() if node in all_nodes])
# pos = community_layout(_graph, gcom, node2comm, k_within=0.2, k_between=5, seed=7)
# colors = [node2comm[n] for n in _graph.nodes]
# nx.draw(_graph, pos, node_color=colors, with_labels=False, cmap=pltm, node_size=10, width=0.1, alpha=0.8)
# plt.show()
