In [None]:
import requests
import requests.auth
import re
import numpy as np
import pandas as pd
from nltk import word_tokenize

Generate a link to query the API

In [None]:
#Criteria (full list and API documentation here: https://pushshift.io/api-parameters/)

criteria=dict()

#type (comment or submission)
search_type='submission'

#search term
criteria['q']='halloween'

#start time (m (minute), h (hour), d (day))
criteria['before']='0d'

#end time (m (minute), h (hour), d (day))
criteria['after']='30d'

#size of results (max=1000)
criteria['size']='1000'

#sort results by score
criteria['sort_type']='score'
criteria['sort']='desc'

#subreddit
criteria['subreddit']='AskReddit'

#score threshold
criteria['score']='>15'

#author
#criteria['author']='user1992'

#function for generate the search URL from criteria
def get_url(search_type,criteria):   
    url='https://api.pushshift.io/reddit/search/' + search_type + '/?'
    for crit in criteria:
        if crit=='score':
            url=url + crit + criteria[crit] + '&'
        else:
            url=url + crit + '=' + criteria[crit] + '&'
    return url

#generate the url 
url=get_url(search_type,criteria)

print(url)

Find submission about candy

In [None]:
#request data
submissions = requests.get(url).json()['data']

#loop through submissions
for submission in submissions:
    
    #get submission title
    title=submission['title']
    
    #if candy in title, get submission link_id and break loop
    if 'candy' in title:
        print(title)
        
        #get link ID to query comments
        link_id=submission['id']       
        
        break
        

#Now search comments in this link with new url
criteria=dict()

#submission link_id
criteria['link_id']=link_id

#size
criteria['size']='1000'

#type
search_type='comment'

#get URL
url=get_url(search_type,criteria)

#request data
comments = requests.get(url).json()['data']

Check the comments

In [None]:
for comment in comments:
    
    #print if comment is a parent (top level) comment
    if comment['link_id']==comment['parent_id']:
    
        print(comment['body'])
        print('\n')
        

Do sentiment analysis in two different subreddits with AFINN

In [None]:
#analyze sentiment with afinn
from afinn import Afinn
afinn = Afinn()

#initialize search criteria
criteria1=dict()
criteria2=dict()

#type (comment or submission)
search_type='submission'

#start time (m (minute), h (hour), d (day))
criteria1['before']='0d'
criteria2['before']='0d'

#end time (m (minute), h (hour), d (day))
criteria1['after']='300d'
criteria2['after']='300d'

#size of results (max=1000)
criteria1['size']='1000'
criteria2['size']='1000'

#sort
criteria1['sort_type']='score'
criteria1['sort']='desc'
criteria2['sort_type']='score'
criteria2['sort']='desc'

#subreddits
criteria1['subreddit']='tifu'
criteria2['subreddit']='WholesomeMemes'

#get urls
url1=get_url(search_type,criteria1)
url2=get_url(search_type,criteria2)

#get submissions
submissions1 = requests.get(url1).json()['data']
submissions2 = requests.get(url2).json()['data']

sentiments1=[]
sentiments2=[]

#get sentiments for subreddit1 submissions
for submission in submissions1:
    post=submission['title']
    sentiment=afinn.score(post) / len(post.split(' '))
    sentiments1.append(sentiment)

#get sentiments for subreddit2 submissions
for submission in submissions2:
    post=submission['title']
    sentiment=afinn.score(post) / len(post.split(' '))
    sentiments2.append(sentiment)
    
print('AFINN SCORE:')
print(criteria1['subreddit'] + ': ' + str(np.mean(sentiments1)))
print(criteria2['subreddit'] + ': ' + str(np.mean(sentiments2)))