In [190]:
import json
import re
import praw
import pickle

In [101]:
with open('config.txt') as f:
    client, secret = f.read().split()

In [102]:
reddit = praw.Reddit(client_id=client,
                     client_secret=secret,
                     user_agent='pizza sub mining bot')

In [66]:
def get_posts():
    posts = []
    for i in range(1,11):
        with open('data/' + str(i) + '.json') as f:
            data = json.loads(f.read())
        posts.extend(data['data']['children'])
    return [post['data'] for post in posts]

In [67]:
posts = get_posts()

In [94]:
def get_flairs_without_num(posts):
    flair_count = {}
    for post in posts:
        flair = re.sub('\(.*\)','',post['author_flair_text']).strip()
        flair = re.sub('\w+:\s\w+','',flair).strip()
        if flair not in flair_count:
            flair_count[flair] = 1
        else:
            flair_count[flair] += 1
    return flair_count

In [95]:
get_flairs_without_num(posts)

{u'Big Fish': 47,
 u'Fish': 53,
 u'Giver': 24,
 u'Gold Giver': 1,
 u'Goldfish': 5,
 u'Hungry': 33,
 u'Pizza Tycoon': 4,
 u'Recipient': 406,
 u'Small Fish': 373,
 u'Thief': 2,
 u'Trader': 32}

In [97]:
def get_all_fulfiled(posts):
    fulfiled_posts = []
    for post in posts:
        link_flair_text = post['link_flair_text']
        if link_flair_text:
            fulfiled_posts.append(link_flair_text)
    return fulfiled_posts

In [98]:
get_all_fulfiled(posts)

[u'Fulfilled',
 u'Fulfilled',
 u'Fulfilled',
 u'No Longer Needed',
 u'No Longer Needed',
 u'Closed',
 u'Fulfilled',
 u'Fulfilled',
 u'Fulfilled',
 u'Fulfilled',
 u'Fulfilled',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'No Longer Needed',
 u'Request',
 u'No Longer Needed',
 u'No Longer Needed',
 u'Thanks',
 u'Thanks',
 u'No Longer Needed',
 u'No Longer Needed']

In [199]:
def extract_comments(post):
    comment_url = post['url']
    submission = reddit.submission(url=comment_url)
    post['comments'] = []
    for comment in submission.comments:
        if not comment.author:
            continue
        if comment.author.name != 'AutoModerator' or comment.author.name != 'AutoModerator':
            post['comments'].append(comment)

def fulfill_in_comments(comment):
    keywords = set(['fulfilled','thank you','got pizza','appreciated','$fulfilled'])
    if keywords & set(comment.body.lower().split()):
        return True
    return False

def fulfill_in_link_body(text):
    if 'fulfilled by' in set(text.lower().split()):
        return True
    #find text after edit
    text = text.split('edit')
    if len(text) > 1:
        text = set(''.join(text[1:]).lower().split())
        keywords = set(['fulfilled','thank you','got pizza','appreciated'])
        if keywords & text:
            return True
    return False

def is_op_recipient(post):
    post['recipient'] = False
    flair = post['author_flair_text'].lower()
    if 'recipient' in flair or 'got' in flair:
        post['recipient'] = True

In [200]:
def set_fulfillment_status(post):
    post['fulfilled'] = fulfill_in_link_body(post['selftext'])
    if not post['fulfilled']:
        extract_comments(post)
        for comment in post['comments']:
            if comment.author.name == post['author']:
                post['fulfilled'] = fulfill_in_comments(comment)
        

In [201]:
def set_all_fulfillment_status(posts):
    for post in posts:
        post['fulfilled'] = False
        set_fulfillment_status(post)

def set_all_recipient(posts):
    for post in posts:
        is_op_recipient(post)

In [186]:
# set_fulfillment_status(posts[20])
# posts[20]['fulfilled']
# # fulfill_in_comments(posts[20])

In [187]:
# keywords = set(['fulfilled','thank you','got pizza','appreciated','$fulfilled'])
# test = set([u'gift', u'/u/op_is_asshole', u'$25', u'$fulfilled', u'hut', u'by', u'card', u'pizza'])
# if keywords & test:
#     print 'true'

In [188]:
set_all_fulfillment_status(posts)

In [189]:
len([post for post in posts if post['fulfilled']])

10

In [204]:
set_all_recipient(posts)
len([post for post in posts if post['recipient'] and len(post['comments'])])

273

In [196]:
[post for post in posts if len(post['comments']) > 1][50:55]

[{u'approved_by': None,
  u'archived': False,
  u'author': u'leod-jpg',
  u'author_flair_css_class': u'recipient-1to2',
  u'author_flair_text': u'Recipient (Got 1)',
  u'banned_by': None,
  u'brand_safe': True,
  u'clicked': False,
  'comments': [Comment(id='ddsiwj8'),
   Comment(id='ddtjspu'),
   Comment(id='ddspba4')],
  u'contest_mode': False,
  u'created': 1487219352.0,
  u'created_utc': 1487190552.0,
  u'distinguished': None,
  u'domain': u'self.Random_Acts_Of_Pizza',
  u'downs': 0,
  u'edited': False,
  'fulfilled': False,
  u'gilded': 0,
  u'hidden': False,
  u'hide_score': False,
  u'id': u'5ua1eg',
  u'is_self': True,
  u'likes': None,
  u'link_flair_css_class': None,
  u'link_flair_text': None,
  u'locked': False,
  u'media': None,
  u'media_embed': {},
  u'mod_reports': [],
  u'name': u't3_5ua1eg',
  u'num_comments': 7,
  u'num_reports': None,
  u'over_18': False,
  u'permalink': u'/r/Random_Acts_Of_Pizza/comments/5ua1eg/request_pizza_for_a_homeless_guy_living_in_the/?ref=se

In [191]:
with open('data/parsed_data.pkl','wb') as f:
    pickle.dump(posts,f)