## Find out the users that post the most in a set of subreddits

edit the `sub_names` var to add/remove subreddits from the calculation

In [6]:
import praw
import pandas as pd
import os
from dotenv import load_dotenv
load_dotenv()

##### vars #####
sub_names = ['politics', 'all', 'news', 'interestingasfuck', 'whitePeopleTwitter', 'worldnews', 'todayilearned', 'BlackPeopleTwitter', 'PoliticalHumor', 'MadeMeSmile', 'memes', 'pics', 'antiwork', 'PublicFreakout']
################

reddit = praw.Reddit(
    client_id=os.getenv('client_id'),
    client_secret=os.getenv('client_secret'),
    user_agent="submissions comments",
    username=os.getenv('username'),
)

def serialize_reddit(s): 
    return {
        "author": s.author,
        "created_utc": s.created_utc,
    }
def flatten(l):
    return [item for sublist in l for item in sublist]


submissions = list(flatten(reddit.subreddit(s).top(time_filter='month',limit=200) for s in sub_names))
df = pd.DataFrame([serialize_reddit(s) for s in submissions])
df['created_utc'] = pd.to_datetime(df.created_utc.values, unit='s', utc=True)

print(sub_names)

ndf = df.query('author != "None"')
print('Total submissions:', len(ndf))

print('Number of unique authors:', ndf.author.nunique())

csum = ndf.author.value_counts(1).cumsum()

bins = pd.cut(csum, [0., 0.25, 0.5, 0.75, 1.], labels=['25%','50%','75%','100%'])

print('\nQuartiles:')
print(csum.groupby(bins).size())

print('\nTop 5 authors:')
topa = pd.concat([ndf.author.value_counts().head(), ndf.author.value_counts(1).head()*100],axis=1)
topa.index.name = 'Author'
topa.columns = ('Count', 'Percent')
print(topa)
print('\n======\n')

['politics', 'all', 'news', 'interestingasfuck', 'whitePeopleTwitter', 'worldnews', 'todayilearned', 'BlackPeopleTwitter', 'PoliticalHumor', 'MadeMeSmile', 'memes', 'pics', 'antiwork', 'PublicFreakout']
Total submissions: 2800
Number of unique authors: 1866

Quartiles:
author
25%     103
50%     368
75%     697
100%    698
Name: author, dtype: int64

Top 5 authors:
                  Count   Percent
Author                           
DaFunkJunkie         41  1.470061
pietradolce          27  0.968089
CrooklynKnight       19  0.681248
Limitless_yt89       16  0.573682
dilettantedebrah     15  0.537827


