https://github.com/huggingface/transformers/blob/0ae96ff8a7e2d371242452d81bee85da8df202f5/examples/text-generation/run_generation.py

# get a bunch of user comments

In [1]:
# check praw.ini in root
import praw

In [2]:
reddit = praw.Reddit()

In [3]:
user_name = 'suncoasthost'

In [4]:
user = reddit.redditor(user_name)

In [5]:
def get_context(comment, reddit):
    submission = reddit.submission(id=comment.link_id.replace('t3_', ''))
    parent_comment = None
    if not comment.parent_id == comment.link_id:
        # not a top level comment, try to retrieve parent comment
        parent_comment = reddit.comment(id=comment.parent_id.replace('t1_', ''))
    return parent_comment, submission

def get_all_context(comment, reddit):
    parent_comment, submission = get_context(comment, reddit)
    if parent_comment is None:
        # base case: is a top level comment
        parent_comments = []
        return parent_comments, submission
    else:
        parent_comments, submission = get_all_context(parent_comment, reddit)
        return (parent_comments + [parent_comment], submission)

In [6]:
import os
import json
import pickle

In [7]:
from datetime import datetime

In [22]:
def format_comment_as_json(comment):
    return {
        'id': comment.id, 
        'author': comment.author.name if comment.author is not None else None, 
        'body': comment.body, 
        'created_utc': comment.created_utc,
        'permalink': comment.permalink,
    }

def format_submission_as_json(submission):
    return {
        'id': submission.id,
        'subreddit': submission.subreddit.display_name,
        'title': submission.title,
        'selftext': submission.selftext,
        'permalink': submission.permalink,
        
    }

In [23]:
import pandas as pd

In [24]:
outpath = 'data/user/{}'.format(user_name)
os.makedirs(outpath, exist_ok=True)

manifestpath = os.path.join('data/user/{}.csv'.format(user_name))
if not os.path.isfile(manifestpath):
    manifestdf = None
    with open(manifestpath, 'w+') as f:
        f.write('comment_id, created_utcnow_isoformat\n')
else:
    manifestdf = pd.read_csv(manifestpath)

# args for user.comments.new()
# https://praw.readthedocs.io/en/latest/code_overview/other/listinggenerator.html#praw.models.ListingGenerator
# limit – default 100, max 1000
limit = None
i = 0
for comment in user.comments.new(limit=limit):
    print ('[{}/{}] id: {}, body: {}'.format(
        i, limit, comment.id, comment.body.replace('\n', ' ').replace('\t', ' ')[:50]
    ))
    i += 1
    if manifestdf is not None and comment.id in list(manifestdf['comment_id']):
        print ('skip since comment dump exists...')
        continue
    else:
        parent_comment, submission = get_context(comment, reddit)
        package = {
            'comment': format_comment_as_json(comment),
            'parent_comment': format_comment_as_json(parent_comment) if parent_comment is not None else None,
            'submission': format_submission_as_json(submission)
        }
        with open(manifestpath, 'a+') as f:
            f.write('{}, {}\n'.format(comment.id, datetime.utcnow().isoformat()))
        with open(os.path.join(outpath, '{}.json'.format(comment.id)), 'w+') as f:
            json.dump(package, f, indent=4)

[0/None] id: fwbahg8, body: not my choice. was the cto
skip since comment dump exists...
[1/None] id: fw9xf8b, body: depends on your hosting situation. I am hosted on 
skip since comment dump exists...
[2/None] id: fw9q6m5, body: it is but it scales up easy. Major organizations u
skip since comment dump exists...
[3/None] id: fw6ewe6, body: Since you are using Angular for your front end (cl
skip since comment dump exists...
[4/None] id: fvwmkq9, body: True, I only mentioned it because OP said they wer
skip since comment dump exists...
[5/None] id: fvw3s2e, body: Wordpress itself offers pretty affordable hosting.
skip since comment dump exists...
[6/None] id: fvuueav, body: check to see if your server instance is required a
skip since comment dump exists...
[7/None] id: fvtdytg, body: line 8 is improper syntax.   set the “port” proper
skip since comment dump exists...
[8/None] id: fvhkuri, body: It might be worth taking a hard look at yourself a
skip since comment dump exists...
[9/None

[79/None] id: euyqut0, body: Are you looking to self host or use a cloud soluti
[80/None] id: etnqubi, body: The original DDR high score.
[81/None] id: etnqobm, body: with javascript running as well or just the presen
[82/None] id: etnqg37, body: I agree, although I would want the whole object to
[83/None] id: et8hbb0, body: I have had this issue before, I don’t have access 
[84/None] id: et0t3br, body: yeah it’s not the most compact, but it makes it wa
[85/None] id: eszmhxj, body: I use font awesome in Vue but I side load it in we
[86/None] id: erwb7mj, body: I found this on the web:  “Most probably, the prob
[87/None] id: erwav7j, body: Have you tested the download with Safari?
[88/None] id: erw0ozq, body: is it possible the file generation takes long than
[89/None] id: ervqodr, body: since you are deleting the file anyways have you l
[90/None] id: ervql3c, body: headers can be added as a 3rd parameter still. it 
[91/None] id: ervhu72, body: did you try adding “download “ attribute t