# AIMS OF THE PROJECT:
## - Top Stock Mentions Today (Tickers), Last 2 days, 5 days, 7 Days, Custom number. Provide it in the list or barchart
## - Print out Comments that mentioned these tickers or just for one ticker
## - Provide visualisation in form of a graph
## - Create a Jupyter Notebooko tool with buttons that allows non coder to do such tasks.


In [1]:
from datetime import datetime, date, time
import re

import pandas as pd
import numpy as np

import json
import praw
import requests

import jupyter

from praw.models import MoreComments

In [164]:
# Checking the current working directory path
import os
print(os.getcwd())

/Users/nikitaizmailov/Desktop/Reddit analyser


In [2]:
# Jupyter settings for optimizing the output to see without restrictions on view limit
jupyter.textOutputLimit = 0

In [3]:
# Reading JSON file with credentials to access Reddit API Wrapper
creds = 'client_secrets.json'

with open(creds) as f:
    data_creds = json.load(f)

data_creds

{'client_id': 'nF4p5qngjvuOVA',
 'client_secret': '7JiwpJlD8sHhfKJRegj_0li7Wzy6IA',
 'user_agent': 'script by u/TallReplacement229',
 'redirect_uri': 'http://localhost:8080',
 'refresh_token': '378272811181-SQqMoEgWQh8Fz9AQZa_UIN3HBS_Nbg'}

In [4]:
# Creating an instance from Reddit class to interact with Reddit API

reddit = praw.Reddit(
    client_id=data_creds['client_id'],
    client_secret=data_creds['client_secret'],
    user_agent=data_creds['user_agent'],
    redirect_uri=data_creds['redirect_uri'],
    refresh_token=data_creds['refresh_token']
)


Version 7.2.0 of praw is outdated. Version 7.3.0 was released Thursday June 17, 2021.


In [138]:
# Remaining api request limits left
# Note, that the values are empty until you've actually issues a command that results in a network request.
reddit.auth.limits

{'remaining': 588.0, 'reset_timestamp': 1625317200.482434, 'used': 12}

In [6]:
# Choosing a subreddit to analyse
subr = 'wallstreetbets'

# initialising an instance of this class for subreddit
subreddit = reddit.subreddit(subr)

In [7]:
# Data storage: Storing all needed attributes of submission object then parsing into the pandas df
submission_posts = {}

In [8]:
# Current Hot Submissions on Reddit group
submissions = subreddit.hot(limit=10)
for number, submission in enumerate(submissions):
    print('%d) submission: %s' % (number, submission.title))
    # saving the submission attributes in the dictionary for later use
    submission_posts[number] = [submission.id, submission.title, submission.author, 
                                submission.score, submission.permalink, submission.num_comments] 

0) submission: Weekend Discussion Thread for the Weekend of July 02, 2021
1) submission: Most Anticipated Earnings Releases for the week beginning July 5th, 2021
2) submission: Guh
3) submission: Dealer’s Choice 🥸
4) submission: 47k AMC 🦍🚀🌝. WISH ME LUCK
5) submission: U.S. House approves $715 bln infrastructure bill. 4 billion go to creating EV charging stations.
6) submission: SOFI, you just took one in the jugular, man.
7) submission: AAPL, AMZN, TSLA in the crockpot for 6 years
8) submission: When someone older than you starts picking up day trading…
9) submission: $CLF TO THE MOON BABY


In [10]:
# Creating a DataFrame to store Submissions data from the subreddit selected
df = pd.DataFrame.from_dict(data=submission_posts, 
                            orient='index', 
                            columns=['submission_id', 'submission_title', 'author', 'score', 'link', 'num_comments'])
df['link'] = 'reddit.com' + df['link'].astype(str)
df

Unnamed: 0,submission_id,submission_title,author,score,link,num_comments
0,oci3qd,Weekend Discussion Thread for the Weekend of J...,OPINION_IS_UNPOPULAR,205,reddit.com/r/wallstreetbets/comments/oci3qd/we...,6621
1,ocvavi,Most Anticipated Earnings Releases for the wee...,bigbear0083,2,reddit.com/r/wallstreetbets/comments/ocvavi/mo...,1
2,ocnh7x,Guh,Memetron9000,23883,reddit.com/r/wallstreetbets/comments/ocnh7x/guh/,562
3,ocptoc,Dealer’s Choice 🥸,Throwaway1forall,1313,reddit.com/r/wallstreetbets/comments/ocptoc/de...,63
4,och45s,47k AMC 🦍🚀🌝. WISH ME LUCK,MilanFLopez,5640,reddit.com/r/wallstreetbets/comments/och45s/47...,867
5,oca414,U.S. House approves $715 bln infrastructure bi...,Feisty_Trouble,6319,reddit.com/r/wallstreetbets/comments/oca414/us...,733
6,ocg6du,"SOFI, you just took one in the jugular, man.",TheFleshGordon,2190,reddit.com/r/wallstreetbets/comments/ocg6du/so...,171
7,ocpub3,"AAPL, AMZN, TSLA in the crockpot for 6 years",merlin322,357,reddit.com/r/wallstreetbets/comments/ocpub3/aa...,76
8,ocmn0o,When someone older than you starts picking up ...,ElGringoSuave99,538,reddit.com/r/wallstreetbets/comments/ocmn0o/wh...,34
9,oci8jh,$CLF TO THE MOON BABY,Frosty_Resilience,570,reddit.com/r/wallstreetbets/comments/oci8jh/cl...,116


In [None]:
# Creating a Dataframe to store comments from the above dataframe with their respective submission_id
# temporary storage of submission_ids
dict_subm = {}
list_of_subm_ids = df['submission_id'].tolist()
# Selecting for now only comments from the first submission
list_of_subm_ids = list_of_subm_ids[0]

In [59]:
##  Function to extract comments from a submission and store it in the dictionary
def comments_to_submission_id_mapping(dict_subm, sub_id):
    # Accessing the submission via reddit.submission method
    example_submission = reddit.submission(id=sub_id)
    # Sorting all comments in the submission by newest first
    example_submission.comment_sort = 'new'

    # Flattening the forrest comment to same level (No more top level and replies level comments)
    # Also Replacing More Comments objects with Comments objects themselves.
    example_submission.comments.replace_more(limit=None)

    all_comments = example_submission.comments.list()

    # Check uniqueness of each comment
    comment_done = set()

    # In the Praw docs it states that these MoreComments objects are a representation 
    # of the load more comments and continue this thread links encountered on Reddit.
    # To solve this issue so it loads all the comments and don't get stuck on MoreComments object
    for comment in all_comments:
        if isinstance(comment, MoreComments):
            # This should not be executed as we have converted all more comments to comments above
            continue
        if '**User Report**' in comment.body:
            continue
        if len(comment.body) > 2 and comment.id not in comment_done:
            if sub_id not in dict_subm:
                print(comment.body)
                comment_done.add(comment.id)
                dict_subm[sub_id] = [comment.body]
            else:
                print(comment.body)
                dict_subm[sub_id].append(comment.body)


comments_to_submission_id_mapping(dict_subm, list_of_subm_ids)

If anyone knows any good escort services in Santa Barbara then DM me
Anyone know any good escort services in Santa Barbara??
My wife’s having our first child today. Inb4 i yolo his college fund on 0 DTE calls
How much hedge could a hedge fund hedge if a hedge fund could fund hedges?
stop looosing money and start investing in TSLA
So got about 8k in cash on RH decided to turn on margin just to see, got instantly approved big surprise, and they decided I was responsible enough to handle 180k. What a bunch of retards.
In order to succeed you must first survive 

-21 savage on wallstreet
Give me some rock songs for working out guys
SONY looking like it's gonna make a move up on the weekly. But I am a retard. This is not financial advice.
WTF is the point of being a millionaire? I can't even afford a house
This is a super dumb idea… buttttt 

Jan 2003 - SARS market pulled back fair amount for first few months of SARS outbreak due to fear of pandemic.


Sep 2007 - Global financial crisis due

Start a hobby or join. A club... find like a mushroom hunting club or join a yoga class. Whatever you're into just do it and do it often enough that you're comfortable and after so.e ti.e ask the people you you like on a friend date to BBQ, or do something else that you both or all enjoy
Don't get married and if you are already, dont have kids
Learn to eat it.
Join a cult...easiest way to make friends
Find a few retards on here in your area
ask random strangers if AMC is going to the moon
Get ready for a green dildo for Tuesday :)
Yes.
probably green. might take a few weeks to decline.
Green
Green
I personally wouldn’t bet against it right now.
It's due to Fed QE. Fed is not going to taper QE so they know interest rates won't rise, therefore these large cap stonks are the new treasury bond.
They don’t like competition
Which one was that?
A brown bear perhaps.  Then grizzlies gonna fuck shit up.
Nah don’t do that trust me I’ve been in puts for a week and it ain’t dropping
Yes. And I gen

Gonna have to give details if you really wanna know what you did wrong
Let me guess AMC?
Delta Lockdown by Premier League Start. 🤡🤡🤡
Calls on $YNWA
r/nonononoyes
No it’s only one company
Chicken Sammy and a chocolate shake please
Ass pieces
Key word "invest" not gamble.
Oh my gourd! 


*I am a bot, and this action was performed automatically. Please [contact the moderators of this subreddit](/message/compose/?to=/r/wallstreetbets) if you have any questions or concerns.*
https://imgur.com/1sK2p8S
[Never happened to me](http://imgur.com/gallery/R390EId)
This is 10% percent luck, 20% skill
It runs out late Q3
Oh no rip your money
🕺🕺⚰️🕺🕺
Yolo on SQQQ when it feels right
Expirary and strike?
Me too kid! 240 7/9 430p
Someone’s assmad that they ugly
this man is HORNY
The damn dog is like 20lbs heavier without the filters anyway….
Swing traded the last spike, didn’t get it done this time, but also didn’t expect for it to crash that hard between after market and noon today. Can pat myself on th

Damn. Normal week, eh?
I'm only it in it for the high IV. Sold puts, get assigned shares, sell calls, shares get called, rinse repeat.
Glad you are 100% on that now i have to sell i guess lo motherfucking l
They don’t wanna hear it.
It’s possible
After today idk they may all be dead
Thanks for the sacrifice man.
I figure we've been at ATH for almost a year now, so why fight the trend?
Bruh 😢
I'm hoping for a tuesday morning sell off and get out ASAP.
Til it’s not
I got exercised on purpose retard. If we pop off to 19-19.5 I make a lot of money.
gitit
Yes
Nah just some gambling money. Bet there will be a retraction. Tuesday at some point after the rally today
And AMC
🚀🚀🚀
Yeah, IWM is a far better indicator of the type of day this sub is having than any of the popular indices.
They already knew before your comment
I order my shit online. Wouldn't be caught dead with all those poors.
NVDA
Yep, he swooped in.
I agree this is dump portion. 

&#x200B;

For the last couple of days I have been

In [76]:
len(dict_subm['oci3qd'])

6269

In [119]:
# Creating a dataframe with comments to analyse further
df_comments = pd.DataFrame(data=dict_subm)
df_comments.columns = ['Comments']
df_comments = df_comments['Comments'].astype(str)
# Converting a Series back to DataFrame
df_comments = df_comments.to_frame()

In [125]:
# Testing each comment with regex whether it contains the pattern searched
df_test1 = df_comments.copy(deep=True)

# The pattern is correct. The warning appearing at the bottom is misleading.
pattern2 = r'($)?[vV][iI][aA][cC](om|omCBS|CBS)?'
pattern3 = r'[Aa][Mm][Cc]'

df_test1.loc[df_test1['Comments'].str.contains(pat=pattern3, regex=True, case=False)]

Unnamed: 0,Comments
108,Can someone dm me the mass retail sale date fo...
119,"I day trade for a living, I trade hours of my ..."
335,Just bought 2 movie 🍿 tickets for 10$ from AMC...
478,What’s your plans for amc?????? Dimond hands? ...
556,AMC on Tuesday will go 📈📈📈
...,...
5690,Scary thought but the margin call will be caus...
5847,They were both talked about from before times....
5851,"I made 25K on AMC, so I’m certainly not anti. ..."
6104,Spend tesla nvida n amc they gonna rip


In [None]:
# Adding visual to the tickers mentioned. Also need to create an automatic pattern creator
########
########

In [161]:
# Storage for all comments from the whole subreddit where regex pattern match.
regex_comments_storage = {}
more_comments_obj = {}

# Creating a regex pattern object from string
pattern2 = r'($)?[vV][iI][aA][cC](om|omCBS|CBS)?'
pattern3 = r'[Aa][Mm][Cc]'

reg_exp2 = re.compile(pattern2)

In [162]:
# Get comments from subreddit regardless of the submission. I.e. anywhere on the subreddit. All the newest ones
# equivalent of https://www.reddit.com/r/VIAC/comments/
all_recent_comments = subreddit.comments(limit=500)

for num,comment in enumerate(all_recent_comments):
    if isinstance(comment, MoreComments):
        # This should not be executed as we have converted all more comments to comments above
        print('MORE COMMENTS OBJECT!!!')
        more_comments_obj[comment.id] = comment.body
        continue
    if '**User Report**' in comment.body:
        continue
    # Searching for regex pattern
    if reg_exp2.search(comment.body):
        if comment.id not in regex_comments_storage:
            print('\n {0}: {1}'.format(num,comment.body))
            regex_comments_storage[comment.id] = comment.body
        else:
            continue
    print('\n {0}: {1}'.format(num,comment.body))


 0: Should call it the Gulf of Cuba just to fuck with everyone

 1: No one cares what bags you are holding

 2: You guarantee it should. Nice

 3: Your submission was removed from /r/wallstreetbets due to being too short. All text submissions must be at least 100 characters long.

*I am a bot, and this action was performed automatically. Please [contact the moderators of this subreddit](/message/compose/?to=/r/wallstreetbets) if you have any questions or concerns.*

 4: Delco? Grew up in South Philly. My wife is from Ardmore. We bought our 1st house together in Drexel Hill but recently had a custom home built out in Delaware. Hella cheaper to live out here and now we’re 1/2 way closer to our beach house

 5: Fucking nice bro!

 6: 5500 shares  holding long.    This one has some good catalyst coming soon. Earnings august 11.  Also Hood iPo will move the sector so SOFI will move by then also bank charter 25 soon

 7: You bears have fun on Friday because it bleeds Friday expiring calls, 


 100: Do you guys know how easy it is to figure out what stock each motley fool article is shilling? All you have to do is google “Gumshoe Motley Fool” and you can figure it out.

Imagine paying those exorbitant member fees when you can get the stock tip for free if you really wanted…

 101: Their natural habitat is movie theaters!

 102: reee

 103: I am a bot from /r/wallstreetbets. Your submission was removed because was too short. Please make it a comment if it's not worth expounding on.

 104: Don’t be a pussy! Make another deposit and bust their ass wide open!

 105: Because you bought?

 106: congrats, OP u still have some leftover to buy a lot of red crayons!

Red crayons taste delicious!

 107: No more posting this - no otc, penny stocks, nothing below 1B market cap

 108: Así-así

 109: Because it’s always either broken or more expensive than using supercharger.

 110: GME & AMC are going to rocket Tuesday 🚀🚀🚀
THIS IS THE WAY !

 111: Run.

 112: All in what one tuesday? Nee


 200: Alright man

 201: That’s a good 👍 idea

 202: Your post has been removed because a single link is insufficient DD. Please review the following links before resubmitting.

Please note that abusing DD flair can lead to getting banned.

[Submission Flair Guide](https://www.reddit.com/r/wallstreetbets/wiki/linkflair)

[How to conduct DD](https://www.wallstreetoasis.com/forums/on-the-job-with-simple-as-my-research-process)

*I am a bot, and this action was performed automatically. Please [contact the moderators of this subreddit](/message/compose/?to=/r/wallstreetbets) if you have any questions or concerns.*

 203: This and then going to every post about the ticker they sold calling it a trash pump n dump. No you just bought at the absolute top mate.

 204: Where can I lose money today?

 205: Hero

 206: Bro its Australia we don't riot or any of that shit, we dont shoot eachother, we dont hurt eachother. Our country is built on mateship. Our country isn't going to collapse we have 


 300: SOFI is a great long term play. I think we will see it melt upwards over the next 2 weeks. I’m doubtful there will be a squeeze since 1/2 the apes already moved onto other shit plays. I like the stock

 301: BNGO ANYONE/

 303: I’ve learnt a lot about being working with molten iron in a forge from those streams

 304: I buy undervalued stocks and sell them when they have a good pop and move on to the next one and I’ve made much much more than what amazon, apple or Tesla would have made me in the same amount of time, I’m up 50% in the past 3 months and apple is up only 12% in the past 3 months, so stupidity makes money ig

 305: People thought they were ironically being retards when in reality they were retards unironically

 306: Old School

 307: nice we're up to four now

 308: Thanks dad

 309: Unemployment rate increased because jobless people started looking for work again. US lies about its unemployment by crushing peoples' wills, making them stop looking for jobs, then st


 400: I randomly bought a fuck ton of MCD puts when I was drunk. June 2019 with 1 year expiration June 2020. Made a ton of gains when COVID hit and sold the puts at the bottom. Probably my luckiest trade ever. There’s no way they woulda made me money if COVID never happened

 401: September

 402: Fuck that guy

 403: How can you tell that someone is vegan?    


  


Don't worry, they'll friggin tell you.

 404: Found the “white right winger.”

 405: Usa cartoon express

 406: The one in Washington was first

 407: Yeah. Hahaha and I don't even paper trade. I thought it would be fun as if wsb was going to educate the masses but nope they just wanted us to show our yolos, massive gains and losses. Hahaha good one. But I fucking lost count on when 69 days ended.

 408: This makes it through? Wtf

 409: Most apes can’t read

 410: Get a wife that has a job with health insurance.

 411: Eventually adulthood kicks in for everyone. It took me until 44 but then boom waking up at 6am on the 

In [163]:
regex_comments_storage

{}

In [37]:
dict1 = {}
dict1['Nikita'] = ['Lol']
dict1['Nikita'].append('New')
dict1

{'Nikita': ['Lol', 'New']}

### Below is a testing ground for sample codes | Don't write publishable code below

In [None]:
from praw.models import MoreComments

# Obtaining top 10 hottest posts of today on chosen subreddit
top_10_hottest = subreddit.hot(limit=10)
boolean = True

# Obtaining for each post 20 recent comments.
for post in top_10_hottest:
    print('\033[1m Title: {0} \033[0m \n'.format(post.title))

    for num, comm in enumerate(post.comments):
        if num < 20:
            if isinstance(comm, MoreComments):
                print('\033[1m {0}) MoreComments:\033[0m {1}'.format(str(num), str(comm.comments()[0:100])))
                continue
            # Max 100 characters per comment
            print('\033[1m {0}) Comment:\033[0m {1}'.format(str(num), comm.body[0:100]))
        else:
            break
    print('\n')


In [None]:
# Obtaining 100 newest comments of today on chosen subreddit, not on specific submission

subr2 = 'VIAC'
subr = 'wallstreetbets'

subreddit2 = reddit.subreddit(subr2)

subreddit_wsb = reddit.subreddit(subr)

# for Obtaining new post titles
subreddit2_new = subreddit2.new()

# obtaining newest comments for the subreddit regardless of the submission
subr_comments_new = subreddit_wsb.comments()

# for obtaining newly posted comments regardless of the submission
subreddit2_comments = subreddit2.comments()



#for num, comment in enumerate(subreddit2_comments):
#    print(""" 
#        {0}) Comment: {1}
#        URL: {2}
#        Author: {3}
#        """.format(str(num), comment.body, comment.link_url, comment.author))

for num, comment in enumerate(subr_comments_new):
    print(""" 
        {0}) Comment: {1}
        URL: {2}
        Author: {3}
        """.format(str(num), comment.body, comment.link_url, comment.author))



In [None]:
# Code to search for specific patterns via Regex library
url_submission = 'https://www.reddit.com/r/wallstreetbets/comments/ob59hl/what_are_your_moves_tomorrow_july_01_2021/'
submission_post = reddit.submission(url=url_submission)

pattern = '\wiacom|[Vv][Ii][Aa][Cc]'
pattern2 = '[Aa][Mm][Cc]'

# Converting a pattern into a regular expression object
reg_exp2 = re.compile(pattern)

# array to store matched comments that mentioned ViacomCBS ticker/name
mentioned = []

for num, comment in enumerate(submission_post.comments):
    if isinstance(comment, MoreComments):
        for comm_in in comment.comments():
            print('\033[1m MoreComments:\033[0m {0}'.format(comm_in.body))
            #print('\033[1m {0}) MoreComments:\033[0m {1}'.format(str(num), comment.replace_more_comments().comment))
        continue
    result = reg_exp2.search(comment.body)
    if result:
        mentioned.append(comment.body)
    print('\033[1m {0}) Comment:\033[0m {1}'.format(str(num), comment.body[0:100]))


In [None]:
mentioned

In [None]:
last_update = mentioned.copy()
last_update

In [None]:
len(submission_post.comments)

In [None]:
# Creating a Dataframe to store comments from the above dataframe with their respective submission_id
# temporary storage of submission_ids
dict_subm = {}
list_of_subm_ids = df['submission_id'].tolist()

# Accessing the submission via reddit.submission method
example_submission = reddit.submission(id=list_of_subm_ids[2])
# Sorting all comments in the submission by newest first
example_submission.comment_sort = 'new'

# Flattening the forrest comment to same level (No more top level and replies level comments)
all_comments = example_submission.comments.list()

# Check uniqueness of each comment
comment_done = set()

# In the Praw docs it states that these MoreComments objects are a representation 
# of the load more comments and continue this thread links encountered on Reddit.
# To solve this issue so it loads all the comments and don't get stuck on MoreComments object
for comment in all_comments:
    if isinstance(comment, MoreComments):
        continue
    if '**User Report**' in comment.body:
        continue
    if len(comment.body) > 2 and comment.id not in comment_done:
        if list_of_subm_ids[2] not in dict_subm:
            print('\n' + comment.body)
            comment_done.add(comment.id)
            dict_subm[list_of_subm_ids[2]] = [comment.body]
        else:
            print('\n' + comment.body)
            dict_subm[list_of_subm_ids[2]].append(comment.body)