In [102]:
# Hack Sample Code
# This shows examples of logging in and using the subreddit, submission, redditor, ancd comments objects
# It is not implied that you need to collect these specific attributes of submissions or redditors, but are merely examples

# Imports
import praw
import pandas as pd
import numpy as np
import datetime
import os


In [103]:

# Login and create Reddit instance
myClientIDvar = '7H-wkx3wmj96hJazvWQb-A'
myClientSecret = 'kKTtoMJA0gYXEr9d0ohMp-niRM0yhw'
myRedditUserName = 'Legitimate_Thing5210'
reddit = praw.Reddit(client_id=myClientIDvar, client_secret=myClientSecret, user_agent=myRedditUserName)


In [104]:
"""
Get the top n hot posts from a subreddit
Ex: Get the hottest 30 posts from r/christianity:
  getHotPosts(30, 'christianity')
"""
def getHotPosts(numPosts, subreddit):
  return reddit.subreddit(subreddit).hot(limit=numPosts)


In [105]:
""""
Get Dataframe of list of posts's information from a generator
Ex: Return a dataframe with info about the 30 hottest posts on r/Christianity
  post_generator = getHotPosts(30, 'christianity')
  getPostsDf(post_generator, 10)
  getPostsDf(post_generator, 10)
  getPostsDf(post_generator, 10)
"""
def getHotPostsDf(postings_generator, n_postings):
  post_info_list = []

  for i in range(n_postings):
    try:
      submission = next(postings_generator)
    except:
      print("No more posts to generate")
      break

    post_info_list.append({
      'ID': submission.id,
      'Sub': submission.subreddit,
      'Title': submission.title,
      'URL': submission.permalink,
      'Time': datetime.datetime.fromtimestamp(submission.created_utc),
      'Author': str(submission.author),
      'Body': submission.selftext,
      'IsSelfPost': bool(submission.is_self)
    })
      
    print('Retrieved data for ' + str(i + 1) + ' posts')

  return pd.DataFrame(post_info_list)

In [106]:
"""
Get Dataframe of author information
"""
def getAuthorInfoDF(dfPosts):

    author_list = []

    for indexAuthor, thisIndex in enumerate(dfPosts.index):
        # Get Entry Information
        thisSubmission = reddit.submission(id=dfPosts.loc[thisIndex,'ID'])
        thisAuthor = thisSubmission.author

        # Get Author Information
        TempDictAuthor = {}

        # Verifies that author exists and has not been suspended (as suspended accounts do not have most attributes)
        if (thisSubmission.author is None) or not hasattr(thisAuthor, 'created_utc'):
            continue

        # Get data related to the author
        TempDictAuthor['userName'] = thisAuthor.name
        TempDictAuthor['numComments'] = sum(1 for _ in thisAuthor.comments.top(time_filter="month"))
        try:
            TempDictAuthor['upsGiven'] = sum(1 for _ in thisAuthor.upvoted())
        except:
            TempDictAuthor['upsGiven'] = np.nan

        try:
            TempDictAuthor['downsGiven'] = sum(1 for _ in thisAuthor.downvoted())
        except:
            TempDictAuthor['downsGiven'] = np.nan

        try:
            TempDictAuthor['awardsGiven'] = sum(1 for _ in thisAuthor.gildings())
        except:
            TempDictAuthor['awardsGiven'] = np.nan

        TempDictAuthor['numComments'] = len(thisAuthor.trophies())
        TempDictAuthor['accountAge'] = thisAuthor.created_utc
        TempDictAuthor['totalKarma'] = thisAuthor.total_karma
        TempDictAuthor['linkKarma'] = thisAuthor.link_karma
        TempDictAuthor['commentKarma'] = thisAuthor.comment_karma
        TempDictAuthor['awarderKarma'] = thisAuthor.awarder_karma
        TempDictAuthor['awardeeKarma'] = thisAuthor.awardee_karma
        TempDictAuthor['isMod'] = thisAuthor.is_mod

        author_list.append(TempDictAuthor)

        print('Retrieved data for ' + str(indexAuthor + 1) + ' authors')

    dfAuthors = pd.DataFrame(author_list)
    dfAuthors['isMod'] = dfAuthors['isMod'].astype('bool')
    
    return dfAuthors

In [None]:
def getPostsByAuthors(dfAuthors):
    dfAuthors = dfAuthors["userName"].drop_duplicates()
    posts_lists = []

    for _, username in dfAuthors.iterrows():
        thisAuthor = reddit.redditor(name=username)
        for thisAuthorPost in thisAuthor.submissions.top(time_filter="month"):
            posts_lists.append({
                'ID': thisAuthorPost.id,
                'Author': str(thisAuthor),
                'Sub': thisAuthorPost.subreddit,
                'Title': thisAuthorPost.title,
                'Body': thisAuthorPost.selftext,
                'Upvotes': thisAuthorPost.ups,
                'Upvote Ratio': thisAuthorPost.upvote_ratio,
                'Awards': thisAuthorPost.total_awards_received,
                'URL': thisAuthorPost.permalink,
                'Time': datetime.datetime.fromtimestamp(thisAuthorPost.created_utc),
                'IsSelfPost': bool(thisAuthorPost.is_self)
            })
    return pd.DataFrame(posts_lists)

In [None]:
def getCommentsByPosts(dfPosts):
    comments_list = []

    for _, postID in dfPosts["ID"].iterrows():
        thisSubmission = reddit.submission(id=postID)
        thisSubmission.comments.replace_more(limit=0)

        for tempComment in thisSubmission.comments.list():
            if not hasattr(tempComment,'is_submitter'):
                continue
            comments_list.append({
                'ID': tempComment.id,
                'Author': tempComment.author,
                'Post ID': tempComment.link_id,
                'Post': tempComment.submission,
                'Body': str(tempComment.body),
            })
    
    return pd.DataFrame(comments_list)

In [107]:
'''
Converts a Dataframe to CSV File. If './results/target_csv_file' doesn't exist, creates the file.
Otherwise appends the dataframe to the existing file
'''

def DFToCSV(df, target_csv_file='output.csv'):

  path = './results/' + target_csv_file
  if os.path.exists(path):
    df.to_csv(path, mode='a', index=False, header=False)
  else:
    df.to_csv(path, index=False)


In [108]:
post_generator = getHotPosts(30, 'bible')

In [None]:
posts = getHotPostsDf(post_generator, 1)
DFToCSV(posts, 'bible_hot_posts.csv')
DFToCSV(getAuthorInfoDF(posts), 'bible_authors.csv')