In [None]:
!pip install praw
!pip install networkx

In [None]:
import praw
import networkx as nx

In [13]:
reddit = praw.Reddit(
    client_id='lKctks71iKcBMSKsJCFoFg',
    client_secret='WPsX_4K0yYh0QKMm4jpbXrmYI_ojSg',
    user_agent='script:mentalhealth_network_analysis:v1.0 (by u/Accomplished_Elk218)')

In [14]:
SUBREDDIT_NAME = "mentalhealth"
TARGET_COMMENTS = 5000
TARGET_USERS = 500
TOP_POSTS_LIMIT = 50
NEW_POSTS_LIMIT = 50


g = nx.DiGraph()
unique_users = set()
total_comments = 0

def process_submission(submission):
    global total_comments

    submission.comments.replace_more(limit=0)
    comment_lookup = {}

    # Map submission author (post author)
    if submission.author:
        comment_lookup[submission.id] = str(submission.author)
        unique_users.add(str(submission.author))
        g.add_node(str(submission.author))

    for comment in submission.comments.list():
        if comment.author is None:
            continue

        author = str(comment.author)
        comment_id = comment.id
        parent_id = comment.parent_id.split("_")[1]

        comment_lookup[comment_id] = author
        unique_users.add(author)
        g.add_node(author)

        parent_author = comment_lookup.get(parent_id)
        if parent_author and parent_author != author:
            if g.has_edge(author, parent_author):
                g[author][parent_author]['weight'] += 1
            else:
                g.add_edge(author, parent_author, weight=1)

        total_comments += 1

        # Stop if targets met
        if total_comments >= TARGET_COMMENTS or len(unique_users) >= TARGET_USERS:
            return True  # Signal to stop

    return False

subreddit = reddit.subreddit(SUBREDDIT_NAME)

print(f"Collecting comments from top {TOP_POSTS_LIMIT} posts...")
for submission in subreddit.top(limit=TOP_POSTS_LIMIT):
    if process_submission(submission):
        break

if total_comments < TARGET_COMMENTS and len(unique_users) < TARGET_USERS:
    print(f"Collecting comments from newest {NEW_POSTS_LIMIT} posts...")
    for submission in subreddit.new(limit=NEW_POSTS_LIMIT):
        if process_submission(submission):
            break

print("\n=== Collection Complete ===")
print(f"Total comments collected: {total_comments}")
print(f"Unique users collected: {len(unique_users)}")
print(f"Total nodes in graph: {len(g.nodes)}")
print(f"Total edges in graph: {len(g.edges)}")

# Export graph for Gephi or further analysis
nx.write_graphml(g, "reddit_mentalhealth_combined.graphml")
print("Graph saved as 'reddit_mentalhealth_combined.graphml'")

Collecting comments from top 50 posts...

=== Collection Complete ===
Total comments collected: 687
Unique users collected: 500
Total nodes in graph: 500
Total edges in graph: 468
Graph saved as 'reddit_mentalhealth_combined.graphml'


In [17]:
top_k = 10
centrality_degree = nx.degree_centrality(g)

print("\nTop 10 users by degree centrality:")
for user in sorted(centrality_degree, key=centrality_degree.get, reverse=True)[:top_k]:
    # No 'name' attribute assumed, so print username directly
    print(f"{user}: Degree Centrality = {centrality_degree[user]:.4f}")


Top 10 users by degree centrality:
runawaylovebug: Degree Centrality = 0.2545
WarmlyEccentric: Degree Centrality = 0.2405
Alyndriel: Degree Centrality = 0.1283
Crafty_n_depressed44: Degree Centrality = 0.1002
Emmengard: Degree Centrality = 0.1002
starrfishandcoffee: Degree Centrality = 0.0321
davethemacguy: Degree Centrality = 0.0160
crunchsaffron9: Degree Centrality = 0.0140
Aggressive_Chicken63: Degree Centrality = 0.0140
CanalAnswer: Degree Centrality = 0.0100


In [18]:
pagerank = nx.pagerank(g, weight='weight')
print("\nTop 10 users by PageRank:")
for user in sorted(pagerank, key=pagerank.get, reverse=True)[:top_k]:
    print(f"{user}: PageRank = {pagerank[user]:.4f}")


Top 10 users by PageRank:
WarmlyEccentric: PageRank = 0.1162
Crafty_n_depressed44: PageRank = 0.0877
runawaylovebug: PageRank = 0.0665
Alyndriel: PageRank = 0.0648
Emmengard: PageRank = 0.0283
crunchsaffron9: PageRank = 0.0236
starrfishandcoffee: PageRank = 0.0222
Aggressive_Chicken63: PageRank = 0.0132
wren75: PageRank = 0.0130
LJ1205E: PageRank = 0.0130


In [20]:
btw = nx.betweenness_centrality(g,k=10)
print("\nTop 10 users by PageRank:")
for user in sorted(btw, key=btw.get, reverse=True)[:top_k]:
        print(f"{user}: Betweenness = {btw[user]:.4f}")


Top 10 users by PageRank:
Alyndriel: Betweenness = 0.0052
Crafty_n_depressed44: Betweenness = 0.0014
starrfishandcoffee: Betweenness = 0.0012
davethemacguy: Betweenness = 0.0003
NerdyKnits: Betweenness = 0.0002
crunchsaffron9: Betweenness = 0.0002
CrackedOreo666: Betweenness = 0.0001
WarmlyEccentric: Betweenness = 0.0000
Candle1nTh3Dark: Betweenness = 0.0000
sciencelove1994: Betweenness = 0.0000
