https://github.com/huggingface/transformers/blob/0ae96ff8a7e2d371242452d81bee85da8df202f5/examples/text-generation/run_generation.py

# get a bunch of user comments

In [1]:
# check praw.ini in root
import praw

In [2]:
reddit = praw.Reddit()

In [3]:
user_name = 'suncoasthost'

In [4]:
user = reddit.redditor(user_name)

In [5]:
def get_context(comment, reddit):
    submission = reddit.submission(id=comment.link_id.replace('t3_', ''))
    parent_comment = None
    if not comment.parent_id == comment.link_id:
        # not a top level comment, try to retrieve parent comment
        parent_comment = reddit.comment(id=comment.parent_id.replace('t1_', ''))
    return parent_comment, submission

def get_all_context(comment, reddit):
    parent_comment, submission = get_context(comment, reddit)
    if parent_comment is None:
        # base case: is a top level comment
        parent_comments = []
        return parent_comments, submission
    else:
        parent_comments, submission = get_all_context(parent_comment, reddit)
        return (parent_comments + [parent_comment], submission)

In [6]:
import os
import json
import pickle

In [7]:
from datetime import datetime

In [15]:
def format_comment_as_json(comment):
    return {
        'id': comment.id, 
        'author': comment.author.name, 
        'body': comment.body, 
        'created_utc': comment.created_utc,
        'permalink': comment.permalink,
    }

def format_submission_as_json(submission):
    return {
        'id': submission.id,
        'subreddit': submission.subreddit.display_name,
        'title': submission.title,
        'selftext': submission.selftext,
        'permalink': submission.permalink,
        
    }

In [19]:
outpath = 'data/user/{}'.format(user_name)
os.makedirs(outpath, exist_ok=True)

manifestpath = os.path.join('data/user/{}.csv'.format(user_name))
if not os.path.isfile(manifestpath):
    with open(manifestpath, 'w+') as f:
        f.write('comment_id, created_utcnow_isoformat\n')

# args for user.comments.new()
# https://praw.readthedocs.io/en/latest/code_overview/other/listinggenerator.html#praw.models.ListingGenerator
# limit – default 100, max 1000
limit = 10
i = 0
for comment in user.comments.new(limit=limit):
    print ('[{}/{}] id: {}, body: {}'.format(
        i, limit, comment.id, comment.body.replace('\n', ' ').replace('\t', ' ')[:50]
    ))
    i += 1
    parent_comment, submission = get_context(comment, reddit)
    package = {
        'comment': format_comment_as_json(comment),
        'parent_comment': format_comment_as_json(parent_comment) if parent_comment is not None else None,
        'submission': format_submission_as_json(submission)
    }
    with open(manifestpath, 'a+') as f:
        f.write('{}, {}\n'.format(comment.id, datetime.utcnow().isoformat()))
    with open(os.path.join(outpath, '{}.json'.format(comment.id)), 'w+') as f:
        json.dump(package, f, indent=4)

[0/10] id: fw9xf8b, body: depends on your hosting situation. I am hosted on 
[1/10] id: fw9q6m5, body: it is but it scales up easy. Major organizations u
[2/10] id: fw6ewe6, body: Since you are using Angular for your front end (cl
[3/10] id: fvwmkq9, body: True, I only mentioned it because OP said they wer
[4/10] id: fvw3s2e, body: Wordpress itself offers pretty affordable hosting.
[5/10] id: fvuueav, body: check to see if your server instance is required a
[6/10] id: fvtdytg, body: line 8 is improper syntax.   set the “port” proper
[7/10] id: fvhkuri, body: It might be worth taking a hard look at yourself a
[8/10] id: fvh3dra, body: I recommend HTML5 Blank.
[9/10] id: fve1jrp, body: You need to narrow down the requirements of your A
