# Step 1: instantiate

In [27]:
import praw
import numpy as np
import pandas as pd
from pyvis.network import Network
from decouple import config
from collections import Counter

sub_of_interest = 'FemaleDatingStrategy'
hard_limit = 6

r_client_id = config('REDDIT-CLIENT-ID')
print(r_client_id)
secret = config('REDDIT-SECRET')
print(secret)
r_id = config("USER")
r_pw = config("PW")
 
# ===== Instantiate Reddit instance =====
reddit = praw.Reddit(
    client_id= r_client_id,
    client_secret=secret,
    username=r_id,
    password=r_pw,
    user_agent='1'
)

FE5apSBviVjlEQ
4lOv7xDgIn-XzFBmyFeBB8REpkTQzQ
S4nh4k1mi99e


# Step 2: Define functions

## Step 2.1: define function to obtain recent redditors of a subreddit

## Step 2.2: define fucntion to obtain recent subreddits to which a redditor has posted to

In [32]:
def get_users(sub=sub_of_interest, post_count=8, comment_count=12):
    """
    Finds the most recently active users in a subreddit. It's easier to comment than to post on a subreddit, so more comments are collected.
    :param subreddit: subreddit to crawl
    :param posts_count: number of posts to search for
    :param comments_count: number of comments to search for
    :return: List of usernames who were recently active in a subreddit
    """
    # === Obtain list of posters and commentors ===
    posters = [
        s.author.name                                         #<- Redditor who posted
        for s in reddit.subreddit(sub).new(limit=post_count)  #<- from most recent posts
        if s.author.name != "AutoModerator"                   #<- remove auto moderator
    ]
    
    # --- Same with commentors ---
    commenters = [
        c.author.name
        for c in reddit.subreddit(sub).comments(limit=comment_count)  
        if c.author.name != "AutoModerator"
    ]
    
    # === Combine the posters and commentors ===
    redditors = posters + commenters    
    # --- Remove duplicates
    unique_redditors = list(set(redditors))
    
    return unique_redditors


def activity_list(redditor_list, lim=12, filter_soi=True):
    """
    Get names of subreddits to which users have posted to.
    :param redditor_list: list of redditor usernames
    :param lim: number of posts to limit to; default 12
    """
    
    # === Define lists to store subreddits where redditor has posted and commented to ===
    posts_subreddits = []
    comments_subreddits = []
    
    # === 
    # Iterate over list of redditors and store the names of the subreddits where they posted and store 
    # them in the list to a certain length 
    # ===
    
    # --- While loop to control for length of list ---
    while len(posts_subreddits) < lim:
        # -- Iterate over list of redditors --
        for r in redditor_list:
            
            posts = reddit.redditor(r).submissions.new(limit=lim) #<- new submissions from redditors
            
            # - Iterate over posts/submissions -
            for submission in posts:
                # Skip if they posted in the subreddit that we're scraping, otherwise store to list
                if submission.subreddit.display_name == sub_of_interest:
                    if filter_soi is False:
                        posts_subreddits.append(submission.subreddit.display_name)                    
                else:
                    posts_subreddits.append(submission.subreddit.display_name)
    
    # --- Same with comments ---
    while len(comments_subreddits) < lim:
        for r in redditor_list:
            
            comments = reddit.redditor(r).comments.new(limit=lim)
            
            for comment in comments:
                if comment.subreddit.display_name == sub_of_interest:
                    if filter_soi is False:
                        posts_subreddits.append(submission.subreddit.display_name)
                else:
                    comments_subreddits.append(comment.subreddit.display_name)
            """
            for submission, comment in zip(posts, comments):
                if submission.subreddit.display_name != sub_of_interest and filter_soi:
                    posts_subreddits.append(submission.subreddit.display_name)
                if comment.subreddit.display_name != sub_of_interest and filter_soi:
                    comments_subreddits.append(comment.subreddit.display_name)
                elif filter_soi is False:
                    posts_subreddits.append(submission.subreddit.display_name)
                    comments_subreddits.append(comment.subreddit.display_name)
            """
            
    return posts_subreddits, comments_subreddits


def get_redditors_activity(posts_subreddits_list, comments_subreddits_list):
    posts_weight = {sub: posts_subreddits_list.count(sub)+3 for sub in posts_subreddits_list}
    comments_weight = {com: comments_subreddits_list.count(com) for com in comments_subreddits_list}
    
    def merg_sum(a, b):
        for k in b:
            if k in a:
                b[k] = b[k] + a[k]
        c = {**a, **b}
        return c
    
    activity_weight = merg_sum(posts_weight, comments_weight)
    
    #activity_subreddits = list(set(posts_subreddits_list + comments_subreddits_list))
    
    return activity_weight

# Step 3: get redditors from the sub of interest and their recently active subreddits

In [3]:
redditors = get_users(sub_of_interest, post_count=12, comment_count=20)

print(redditors)

['lietietbie', 'Mysterious_Call_924', 'Aocwannabe', 'Love_Artemis', 'NowTruly', 'Spittens', 'daisy_0720', 'Fun_Sherbet', 'HereForTheFreeFoodOk', 'say10s-exwife', 'Vervibes', 'horeya33', 'LetsGetin_Formation', 'Elitane', 'hiphopradish', 'losersquad711', 'buzzkillyall', 'souredskittles', 'mandoa_sky', '_xyoungbellax_', 'The_Cat_Empress', 'Nonnasaysfuckoff', 'poppy03', 'pickmieshaexorcist', 'extragouda', 'europoor24']


In [24]:
p, c = activity_list(redditors, lim=8, filter_soi=True)

print(p)
print(len(c))

['Anemic', 'FemaleHairLoss', 'bettafish', 'puppy', 'CAKEWIN', 'cake', 'catpics', 'childfree', 'FierceFemaleAmbition', 'fourthwavewomen', 'BPD', 'BPD', 'BPD', 'BPD', 'BPD', 'ShittyVeganFoodPorn', 'MajidJordan', 'lebanon', 'IWantOut', 'lebanon', 'perfectlycutscreams', 'Advice', 'quityourbullshit', 'Advice', 'AskMenAdvice', 'AskMen', 'travel', 'dating_advice', 'travel', 'dating', 'solotravel', 'Conservative', 'usa', 'BreakUps', 'BreakUps', 'FemaleLevelUpStrategy', 'FemaleLevelUpStrategy', 'WitchesVsPatriarchy', 'freebooks', 'u_mandoa_sky', 'ForeverAloneWomen', 'FemaleLevelUpStrategy', 'migraine', 'hackintosh', 'FemaleLevelUpStrategy', 'WeightLossAdvice', 'FemaleLevelUpStrategy', 'intj', 'Infidelity', 'FemaleLevelUpStrategy']
59


In [33]:
activity_weight = get_redditors_activity(p, c)

print(activity_weight)

{'Anemic': 6, 'FemaleHairLoss': 4, 'bettafish': 4, 'puppy': 4, 'CAKEWIN': 4, 'cake': 4, 'catpics': 4, 'childfree': 4, 'FierceFemaleAmbition': 4, 'fourthwavewomen': 4, 'BPD': 9, 'ShittyVeganFoodPorn': 4, 'MajidJordan': 5, 'lebanon': 7, 'IWantOut': 4, 'perfectlycutscreams': 4, 'Advice': 5, 'quityourbullshit': 4, 'AskMenAdvice': 4, 'AskMen': 4, 'travel': 6, 'dating_advice': 4, 'dating': 4, 'solotravel': 4, 'Conservative': 4, 'usa': 4, 'BreakUps': 5, 'FemaleLevelUpStrategy': 14, 'WitchesVsPatriarchy': 5, 'freebooks': 4, 'u_mandoa_sky': 4, 'ForeverAloneWomen': 4, 'migraine': 5, 'hackintosh': 4, 'WeightLossAdvice': 4, 'intj': 4, 'Infidelity': 4, 'SkincareAddiction': 1, 'AskReddit': 1, 'BeAmazed': 1, 'Gastritis': 1, 'worldnews': 1, 'foodies_sydney': 1, 'nextfuckinglevel': 2, 'TooAfraidToAsk': 1, 'analog': 1, 'BPDmemes': 1, 'AbruptChaos': 1, 'unpopularopinion': 1, 'europe': 1, 'EarthPorn': 1, 'relationship_advice': 2, 'relationships': 2, 'aldi': 1, 'Iowa': 1, 'Bumble': 3, 'AskNYC': 1, 'startup

# Step 4: convert the list of primary network into a dataframe

In [37]:
for k, v in activity_weight.items():
    print(f"Key: {k}; value: {v}")

Key: Anemic; value: 6
Key: FemaleHairLoss; value: 4
Key: bettafish; value: 4
Key: puppy; value: 4
Key: CAKEWIN; value: 4
Key: cake; value: 4
Key: catpics; value: 4
Key: childfree; value: 4
Key: FierceFemaleAmbition; value: 4
Key: fourthwavewomen; value: 4
Key: BPD; value: 9
Key: ShittyVeganFoodPorn; value: 4
Key: MajidJordan; value: 5
Key: lebanon; value: 7
Key: IWantOut; value: 4
Key: perfectlycutscreams; value: 4
Key: Advice; value: 5
Key: quityourbullshit; value: 4
Key: AskMenAdvice; value: 4
Key: AskMen; value: 4
Key: travel; value: 6
Key: dating_advice; value: 4
Key: dating; value: 4
Key: solotravel; value: 4
Key: Conservative; value: 4
Key: usa; value: 4
Key: BreakUps; value: 5
Key: FemaleLevelUpStrategy; value: 14
Key: WitchesVsPatriarchy; value: 5
Key: freebooks; value: 4
Key: u_mandoa_sky; value: 4
Key: ForeverAloneWomen; value: 4
Key: migraine; value: 5
Key: hackintosh; value: 4
Key: WeightLossAdvice; value: 4
Key: intj; value: 4
Key: Infidelity; value: 4
Key: SkincareAddicti

In [41]:
primary_network = [k for k, v in activity_weight.items()]
primary_weight = [v for k, v in activity_weight.items()]

print(f"Primary network without removing duplicates: {len(p+c)}")
print(f"Primary network after removing duplicates: {len(primary_network)}")

soi_list = [sub_of_interest for _ in range(len(primary_network))]
data_dict = {
    "source": soi_list, 
    "target": primary_network,
    "weight": primary_weight
}

df = pd.DataFrame(
    data = data_dict
)

display(df.head())
display(df.tail(7))

Primary network without removing duplicates: 109
Primary network after removing duplicates: 70


Unnamed: 0,source,target,weight
0,FemaleDatingStrategy,Anemic,6
1,FemaleDatingStrategy,FemaleHairLoss,4
2,FemaleDatingStrategy,bettafish,4
3,FemaleDatingStrategy,puppy,4
4,FemaleDatingStrategy,CAKEWIN,4


Unnamed: 0,source,target,weight
63,FemaleDatingStrategy,whenthe,1
64,FemaleDatingStrategy,happy,1
65,FemaleDatingStrategy,PetiteFitness,1
66,FemaleDatingStrategy,PlasticSurgery,4
67,FemaleDatingStrategy,TwoXChromosomes,1
68,FemaleDatingStrategy,Menopause,1
69,FemaleDatingStrategy,endometriosis,1


# Step 5: find the secondary list of users and their activity

In [4]:
redditors_secondary = [get_users(subreddit, 1, 2) for subreddit in activity_to]



In [5]:
secondary_network = {activity_to[i]: get_redditors_activity(redditors_secondary[0], 2, False) for i in range(len(redditors_secondary))}

print(secondary_network)

KeyboardInterrupt: 

In [None]:
for i in range(len(redditors_secondary)):
    print(get_redditors_activity(redditors_secondary[i], 2))

# Step 6: create dataframe

In [36]:
def find_edges(subreddit_from, subreddit_to):
    """
    Find the link/edge from a subreddit, to a subreddit.
    :param subreddit_from: Subreddit from
    :param subreddit_to: Subreddit to
    """
    #####################
    # Since edges between subreddits are defined as activities of users across the subs,
    # and the activity is already defined as posts and comments,
    # the lists of users of each subreddit is cross-referenced.
    #####################
    
    # === First, get the list of users from each subreddits ===
    redditors_secondary = [get_users(subreddit, 1, 2) for subreddit in p]
    
    for i in range(len(redditors_secondary)):
        get_redditors_activity(redditors_secondary[i], 2)

In [41]:
# === Cross-reference each subreddit from the "p" list ===
for k, v in enumerate(p):
    print(find_edges(p[k], p[k+1]))

[]
[]
[]
[]
[]


AttributeError: 'NoneType' object has no attribute 'name'

In [9]:
df_counts_p = get_post_activity(sub_of_interest, user_lim=3, limit=8)
df_counts_c = get_comm_activity(sub_of_interest, user_lim=3, limit=15)

# --- Merge the count dataframes together to aid in summing the submissions and comments activity, to be used
# for network mapping ---
df_nx = df_counts_p.merge(
    df_counts_c,
    how='outer',
    on='target',
    suffixes=('_p', '_c')
).fillna(0)

df_nx['weight_total'] = df_nx['weight_p'] + df_nx['weight_c']
df_nx['source'] = [sub_of_interest for i in range(len(df_nx))]

# -- Drop unnecessary columns --
df_nx.drop(['weight_p', 'weight_c'], axis=1, inplace=True)
df_nx.sort_values(by='weight_total', inplace=True, ascending=False)

# Examine the dataframe
print('----- Map data ----- \n', df_nx, '\n')

Vmchik
BasieSkanks
GabbaG0ul
BasieSkanks
~~~~~~~~~~~~~~~~~~~~~~~~~
Eqvvi
~~~~~~~~~~~~~~~~~~~~~~~~~
I_know_right_AS_IF
~~~~~~~~~~~~~~~~~~~~~~~~~


NameError: name 'commentors' is not defined

In [6]:
edge_tuples = [(sub_of_interest, sub, w) for w, sub in zip(df_nx['weight_total'], df_nx['target'])]

print(edge_tuples)

node_list = df_nx['target'].tolist()
node_list = node_list + [sub_of_interest]

print(node_list)

[('FemaleDatingStrategy', 'FemaleLevelUpStrategy', 5.0), ('FemaleDatingStrategy', 'technology', 3.0), ('FemaleDatingStrategy', 'TrollXChromosomes', 2.0), ('FemaleDatingStrategy', 'coolguides', 1.0)]
['FemaleLevelUpStrategy', 'technology', 'TrollXChromosomes', 'coolguides', 'FemaleDatingStrategy']


In [5]:
nx = Network(notebook=True)

nx.add_nodes(node_list, label=node_list)

nx.add_edges(edge_tuples)

nx.show("nodes.html")