In [325]:
import praw
import re
import string
from collections import Counter

### Reddit API access
Once you create a Reddit app in the dev portal
retrieve the following params from it

In [38]:
APP_ID = 'ACqyZR3hYsXzfA'
APP_SECRET = 'xehsNEoOe_EH9DT-JwxekUDXLKxOIA'
USER_AGENT = 'my little scrapper'

### PRAW: Python Reddit API Wrapper
Does all the hard work
https://praw.readthedocs.io/en/latest/index.html

In [39]:
reddit = praw.Reddit(
     client_id=APP_ID,
     client_secret=APP_SECRET,
     user_agent=USER_AGENT
)

In [40]:
subreddit = reddit.subreddit("wallstreetbets")

In [319]:
hot_submissions = []

for submission in subreddit.hot(limit=10):
    author_name = submission.author.name
    author_redditor_instance = reddit.redditor(author_name) 
    new_sumbission = {
        'title': submission.title,  # Output: the submission's title
        'score': submission.score,  # Output: the submission's score
        'submission_id': submission.id,     # Output: the submission's ID
        'submission_url': submission.url,   # Output: the URL the submission points to
        'author': {
            'name': author_name,
            'comment_karma': author_redditor_instance.comment_karma
        }
    }
    hot_submissions.append(new_sumbission)

In [320]:
all_comments = []

url = hot_submissions[0]['submission_url']
sub = reddit.submission(url=url)

for hot_sub in hot_submissions:
    hot_sub_url = hot_sub['submission_url']
    if hot_sub_url.endswith("/"):
        sub = reddit.submission(url=hot_sub['submission_url'])
        for comment in sub.comments:
            if hasattr(comment, 'body'):
                author_name = comment.author.name if comment.author else None
                
                # this part slows down the loop,
                # not sure what to do with the psoter's karma yet
                if author_name:
                    redditor = reddit.redditor(author_name)
                    comment_karma = redditor.comment_karma if redditor.comment_karma else None
                
                num_of_replies = 0
                if (len(comment.replies._comments) > 0):
                    num_of_replies = comment.replies._comments[0].count if hasattr(comment.replies._comments[0], 'count') else 0
                all_comments.append({
                    'thread': hot_sub['title'],
                    'body': comment.body,
                    'author': author_name,
                    'author_comment_karma': comment_karma,
                    'ups': comment.ups,
                    'downs': comment.downs,
                    'awards_count': comment.total_awards_received,
                    'score': comment.score,
                    'created_at': comment.created,
                    'num_of_replies': num_of_replies
                })

In [286]:
# first version, assumes you do not know the stocks by name
# uses regex to filter comment for exaclty three capital letters in a row
# with a space or a dollar sign before and a space after
# returns too many common words, not usable right now
# TODO look into exclusion with a word library

stock_name_re = '( [A-Z]{3} | \$[A-Z]{3} )'

stock_mentions = {}

for com in all_comments:
    com_stocks_names = re.findall(stock_name_re, com['body'])
    if len(com_stocks_names) > 0:
        for com_stock_name in com_stocks_names:
            name = com_stock_name.strip()
            if name[0] == '$':
                name = name[1:]
            if name in stock_mentions:
                stock_mentions[name] += 1
            else:
                stock_mentions[name] = 1

In [332]:
# second version
# uses a list with stock names
# and Counters to count their occurencs in comments

stocks_to_look_for = ['gme', 'amc', 'nok']

positive_words = ['hold', 'buy', 'buying', 'holding', 'yolo', 'long', 'bull', 'up', 'keep']

negative_words = ['sell', 'short', 'shorting', 'selling', 'bear', 'down', 'crash']

negation_list = ['why', 'no', 'not', 'don\'t']

res = {
    'GME': 0,
    'AMC': 0
}

for com in all_comments:
    text = com['body'].lower().translate(str.maketrans('', '', string.punctuation)).split()
    stock_counter = Counter(w for w in text if w in stocks_to_look_for)
    positive_counter = Counter(w for w in text if w in positive_words)
    negative_counter = Counter(w for w in text if w in negative_words)
    
    print(positive_counter, sum(positive_counter.values()))
    print(negative_counter,  sum(negative_counter.values()))
    print(stock_counter)
    print(text)
    
    print('\n')
    print('-----------------')
    print('\n')
    
    if len(stock_counter) >0:
        if 'gme' in stock_counter:
            res['GME'] += stock_counter['gme']
        if 'amc' in stock_counter:
            res['AMC'] += stock_counter['amc']

res

Counter() 0
Counter() 0
Counter({'gme': 1})
['gme', 'overnight', 'thread', 'httpsredditcomrwallstreetbetscommentslb7rg4gmeovernightpajamapartymegathread9000']


-----------------


Counter() 0
Counter() 0
Counter({'gme': 1})
['just', 'bought', '100', 'more', 'gme', 'before', 'close', '9082']


-----------------


Counter({'holding': 2, 'hold': 1}) 3
Counter({'short': 1, 'sell': 1}) 2
Counter({'gme': 1})
['over', 'the', 'last', '6', 'months', 'i’ve', 'been', 'looking', 'at', 'a', 'lot', 'of', 'short', 'squeezes', 'from', 'years', 'past', 'because', 'i', 'think', 'of', 'these', 'as', 'black', 'swan', 'opportunities', 'i’m', 'newer', 'to', 'wsb', 'thank', 'you', 'vets', 'for', 'sharing', 'your', 'community', 'but', 'based', 'on', 'the', 'trade', 'activity', 'volume', 'buysell', 'ratios', 'and', 'odd', 'pricing', 'hitting', 'ceilings', 'rapid', 'spikes', 'with', 'gradual', 'declines', 'etc', 'i', 'strongly', 'believe', 'this', 'squeeze', 'is', 'still', 'on', 'the', 'table', 'it’s', 'obviou



-----------------


Counter() 0
Counter({'sell': 1}) 1
Counter()
['bots', 'trying', 'to', 'spread', 'the', 'lie', 'dfv', 'sold', 'please', 'guys', 'don’t', 'sell', 'his', 'account', 'is', 'still', 'active', 'he', 'just', 'hasn’t', 'posted', 'anything', 'yet', 'this', 'lie', 'could', 'be', 'the', 'one', 'that', 'tumbles', 'it', 'back', 'to', 'low', 'singles']


-----------------


Counter() 0
Counter() 0
Counter()
['people', 'saying', 'dfv', 'sold', 'when', 'he', 'already', 'posted', 'his', 'loss', 'porn', 'for', 'the', 'day', '🖕🌈🐻', '💎👐🚀']


-----------------


Counter({'holding': 1, 'buy': 1}) 2
Counter() 0
Counter({'amc': 1})
['ill', 'be', 'holding', 'amc', 'and', 'probably', 'continue', 'to', 'buy', 'some']


-----------------


Counter({'buying': 1, 'hold': 1}) 2
Counter() 0
Counter({'gme': 1})
['bought', '9', 'gme', '109', 'first', 'time', 'buying', 'stock', 'the', 'plan', 'hold', 'should', 'be', 'interesting', 'no', 'matter', 'what', 'happens']


-----------------


Counter() 0


Counter({'buy': 2, 'long': 1, 'hold': 1, 'keep': 1, 'up': 1}) 6
Counter({'sell': 4, 'short': 2, 'down': 1}) 7
Counter({'gme': 1, 'amc': 1})
['i', 'think', 'our', 'odds', 'are', 'good', 'of', 'winning', 'this', 'tug', 'of', 'war', 'with', 'the', 'shorts', 'guys', 'ladder', 'attacks', 'continue', 'to', 'bring', 'prices', 'down', 'which', 'short', 'firms', 'want', 'so', 'that', 'if', 'we', 'decide', 'to', 'sell', 'they', 'can', 'collect', 'our', 'stocks', 'at', 'low', 'prices', 'they', 'want', 'all', 'the', 'stock', 'back', 'they', 'owe', 'brokers', 'but', 'they', 'can’t', 'win', 'if', 'we', 'never', 'sell', 'no', 'matter', 'how', 'low', 'the', 'price', 'whether', 'that’s', '0', '19', 'cents', 'or', '1', 'if', 'you', 'sell', 'your', 'stock', 'in', 'gme', 'or', 'amc', 'you’re', 'only', 'suiciding', 'and', 'negging', 'remember', 'that', 'we', 'can', 'put', 'in', 'limit', 'orders', 'for', 'lower', 'prices', 'to', 'ensure', 'we', 'get', 'the', 'stocks', 'we', 'want', 'during', 'these', 'ladd

Counter({'buying': 1, 'buy': 1}) 2
Counter({'selling': 1}) 1
Counter()
['at', 'this', 'stage', 'we', 'are', 'balls', 'deep', 'that', 'theres', 'actually', 'no', 'point', 'in', 'selling', 'they', 'can', 'come', 'and', 'get', 'them', 'whenever', 'they', 'like', 'they', 'also', 'cant', 'pause', 'buying', 'forever', 'and', 'so', 'ill', 'buy', 'whatever', 'dip', 'i', 'can', 'and', 'then', 'just', 'wait', 'i', 'got', '30', 'shares', '290', 'so', 'i’ll', 'either', 'take', 'the', 'moon', 'or', 'nothing', '🚀🚀']


-----------------


Counter() 0
Counter() 0
Counter()
['can', 'someone', 'please', 'post', 'the', 'suicide', 'hotline']


-----------------


Counter() 0
Counter() 0
Counter()
['i', 'am', 'too', 'retarded', 'to', 'realise', 'this', 'was', 'the', 'darkest', 'hour', 'lol']


-----------------


Counter() 0
Counter() 0
Counter()
['ive', 'been', 'at', 'fucking', 'zero', 'before', 'in', 'my', 'life', 'and', 'i', 'can', 'do', 'it', 'again', 'its', 'kind', 'of', 'freeing', 'guess', 'ill', 'go

{'GME': 193, 'AMC': 122}