# AIMS OF THE PROJECT:
## - Top Stock Mentions Today (Tickers), Last 2 days, 5 days, 7 Days, Custom number. Provide it in the list or barchart
## - Print out Comments that mentioned these tickers or just for one ticker
## - Provide visualisation in form of a graph
## - Create a Jupyter Notebook tool with buttons that allows non coder to do such tasks.


In [265]:
from datetime import datetime, date, time
import re

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import json
import praw
import requests

import jupyter

from praw.models import MoreComments

In [166]:
# Checking the current working directory path
import os
print(os.getcwd())

/Users/nikitaizmailov/Desktop/Reddit analyser


In [167]:
# Jupyter settings for optimizing the output to see without restrictions on view limit
jupyter.textOutputLimit = 0

In [168]:
# Reading JSON file with credentials to access Reddit API Wrapper
creds = 'client_secrets.json'

with open(creds) as f:
    data_creds = json.load(f)

data_creds

{'client_id': 'nF4p5qngjvuOVA',
 'client_secret': '7JiwpJlD8sHhfKJRegj_0li7Wzy6IA',
 'user_agent': 'script by u/TallReplacement229',
 'redirect_uri': 'http://localhost:8080',
 'refresh_token': '378272811181-SQqMoEgWQh8Fz9AQZa_UIN3HBS_Nbg'}

In [169]:
# Creating an instance from Reddit class to interact with Reddit API

reddit = praw.Reddit(
    client_id=data_creds['client_id'],
    client_secret=data_creds['client_secret'],
    user_agent=data_creds['user_agent'],
    redirect_uri=data_creds['redirect_uri'],
    refresh_token=data_creds['refresh_token']
)


In [228]:
# Remaining api request limits left
# Note, that the values are empty until you've actually issues a command that results in a network request.
reddit.auth.limits

{'remaining': 290.0, 'reset_timestamp': 1625389200.391673, 'used': 310}

In [170]:
# Choosing a subreddit to analyse
subr = 'wallstreetbets'

# initialising an instance of this class for subreddit
subreddit = reddit.subreddit(subr)

In [171]:
# Data storage: Storing all needed attributes of submission object then parsing into the pandas df
submission_posts = {}

In [172]:
# Current Hot Submissions on Reddit group
submissions = subreddit.hot(limit=10)
for number, submission in enumerate(submissions):
    print('%d) submission: %s' % (number, submission.title))
    # saving the submission attributes in the dictionary for later use
    submission_posts[number] = [submission.id, submission.title, submission.author, 
                                submission.score, submission.permalink, submission.num_comments] 

0) submission: Weekend Discussion Thread for the Weekend of July 02, 2021
1) submission: Fraternal Association of Gambling Gentlemen and Yacht Degenerates for July 03, 2021
2) submission: BofA initiates legal proceedings against WSB and Reddit Inc.
3) submission: Pro moves
4) submission: BofA chart showing a downward cock and balls pattern formation. BTD
5) submission: BofA initiates legal proceedings against WSB and Reddit Inc.
6) submission: $82k to $10k. You’re suppose to wait till expiration to sell right? 🤣
7) submission: Everything I've learned about being on Wall Street from WSB
8) submission: $53k TO $306K in just 2 days.
9) submission: BofA is coming after me now.


In [173]:
# Creating a DataFrame to store Submissions data from the subreddit selected
df = pd.DataFrame.from_dict(data=submission_posts, 
                            orient='index', 
                            columns=['submission_id', 'submission_title', 'author', 'score', 'link', 'num_comments'])
df['link'] = 'reddit.com' + df['link'].astype(str)
df

Unnamed: 0,submission_id,submission_title,author,score,link,num_comments
0,oci3qd,Weekend Discussion Thread for the Weekend of J...,OPINION_IS_UNPOPULAR,386,reddit.com/r/wallstreetbets/comments/oci3qd/we...,15573
1,ocufr5,Fraternal Association of Gambling Gentlemen an...,VisualMod,143,reddit.com/r/wallstreetbets/comments/ocufr5/fr...,1769
2,od4b09,BofA initiates legal proceedings against WSB a...,OPINION_IS_UNPOPULAR,21742,reddit.com/r/wallstreetbets/comments/od4b09/bo...,2107
3,ode6ey,Pro moves,Throwaway1forall,469,reddit.com/r/wallstreetbets/comments/ode6ey/pr...,19
4,od3ttb,BofA chart showing a downward cock and balls p...,BigBoiBenis,4117,reddit.com/r/wallstreetbets/comments/od3ttb/bo...,274
5,od2085,BofA initiates legal proceedings against WSB a...,OPINION_IS_UNPOPULAR,2902,reddit.com/r/wallstreetbets/comments/od2085/bo...,576
6,od612v,$82k to $10k. You’re suppose to wait till expi...,SAVAGE__NIGHTS,900,reddit.com/r/wallstreetbets/comments/od612v/82...,191
7,od9xfn,Everything I've learned about being on Wall St...,Anal_Chem,415,reddit.com/r/wallstreetbets/comments/od9xfn/ev...,40
8,od7008,$53k TO $306K in just 2 days.,mrviagrauser,643,reddit.com/r/wallstreetbets/comments/od7008/53...,169
9,od44z9,BofA is coming after me now.,ITM_ENRON_CALLS,1035,reddit.com/r/wallstreetbets/comments/od44z9/bo...,181


In [178]:
# Cell for selecting a specific reddit submission
reddit_link = df['link'][8]
print(reddit_link)

reddit.com/r/wallstreetbets/comments/od7008/53k_to_306k_in_just_2_days/


In [179]:
# Creating a Dataframe to store comments from the above dataframe with their respective submission_id
# temporary storage of submission_ids
dict_subm = {}
list_of_subm_ids = df['submission_id'].tolist()
# Selecting for now only comments from the first submission
list_of_subm_ids = list_of_subm_ids[0]

In [180]:
##  Function to extract comments from a submission and store it in the dictionary
def comments_to_submission_id_mapping(dict_subm, sub_id):
    # Accessing the submission via reddit.submission method
    example_submission = reddit.submission(id=sub_id)
    # Sorting all comments in the submission by newest first
    example_submission.comment_sort = 'new'

    # Flattening the forrest comment to same level (No more top level and replies level comments)
    # Also Replacing More Comments objects with Comments objects themselves.
    example_submission.comments.replace_more(limit=None)

    all_comments = example_submission.comments.list()

    # Check uniqueness of each comment
    comment_done = set()

    # In the Praw docs it states that these MoreComments objects are a representation 
    # of the load more comments and continue this thread links encountered on Reddit.
    # To solve this issue so it loads all the comments and don't get stuck on MoreComments object
    for comment in all_comments:
        if isinstance(comment, MoreComments):
            # This should not be executed as we have converted all more comments to comments above
            continue
        if '**User Report**' in comment.body:
            continue
        if len(comment.body) > 2 and comment.id not in comment_done:
            if sub_id not in dict_subm:
                #print(comment.body)
                comment_done.add(comment.id)
                dict_subm[sub_id] = [comment.body]
            else:
                #print(comment.body)
                dict_subm[sub_id].append(comment.body)
    print('Extraction Comepleted!')
    print('Total number of comments extracted: %d' % (len(dict_subm[sub_id])))


comments_to_submission_id_mapping(dict_subm, list_of_subm_ids)

Extraction Comepleted!
Total number of comments extracted: 1


In [182]:
len(dict_subm['oci3qd'])

14387

In [188]:
# Creating a dataframe with comments to analyse further
df_comments = pd.DataFrame(data=dict_subm)
df_comments.columns = ['Comments']
df_comments = df_comments['Comments'].astype(str)
# Converting a Series back to DataFrame
df_comments = df_comments.to_frame()

In [216]:
# Testing each comment with regex whether it contains the pattern searched
df_test1 = df_comments.copy(deep=True)

# The pattern is correct. The warning appearing at the bottom is misleading.
pattern2 = r'($)?[vV][iI][aA][cC](om|omCBS|CBS)?'
pattern3 = r'[Aa][Mm][Cc]'

df_test1.loc[df_test1['Comments'].str.contains(pat=pattern3, regex=True, case=False)]

Unnamed: 0,Comments
24,"Ngl, I've laughed as gme and amc have sold off..."
189,AMC🚀🚀
239,Keep wondering if AMC would give shareholders ...
540,The two best squeezes of the year have a few t...
575,[https://franknez.com/heres-why-red-days-shoul...
...,...
13341,They were both talked about from before times....
13345,"I made 25K on AMC, so I’m certainly not anti. ..."
13481,True… webull is supposed to have fraction shar...
13998,Spend tesla nvida n amc they gonna rip


In [340]:
# Adding visual to the tickers mentioned. Also need to create an automatic pattern creator
########
########
df_mentions_visual = df_comments.copy(deep=True)

# Pattern selected
pattern3 = r'AMC|VIAC|CLOV|BB|FB|AAPL|GOOG|GME|SPY'

# Filtering for the pattern needed
df_mentions_visual = df_mentions_visual.loc[df_mentions_visual['Comments'].str.contains(pat=pattern3, 
                                                                                        regex=True, case=False)].reset_index(drop=True)
# Column names for the newly grouped dataframe
cols = pattern3.split('|')

def splittingdata(cols):
    df_temp = df_mentions_visual.copy(deep=True)
    for col in cols:
        try:
            df_temp[col] = df_mentions_visual.loc[df_mentions_visual['Comments'].str.contains(pat=col, regex=True, case=False)].reset_index(drop=True)
        except Exception as exc:
            print(exc)
    return df_temp

df_split = splittingdata(cols)

# dropping a Comments column as it is no longer needed
df_split = df_split.drop('Comments', axis=1)

# Adding a row with total count of Non-NAN rows for each column
df_split.loc['Total by column'] = df_split.count()

# Sorting by the highest amount of mentions to lowest amount of mentions
df_split = df_split.sort_values(df_split.last_valid_index(), axis=1, ascending=False)

# 
df_split = df_split.dropna(axis=0, how='all')
df_split


Unnamed: 0,SPY,AMC,BB,GME,CLOV,GOOG,AAPL,FB,VIAC
0,"Happy birthday America, SPY 550 EOY","Ngl, I've laughed as gme and amc have sold off...","For a clarification of what happened, we went ...","Ngl, I've laughed as gme and amc have sold off...",My 3K in wish / clov isn’t 100K yet and it mak...,Will google split?,Aapl predictions for next week?,Should I go to bed or pass out here again http...,CLF and VIAC.
1,Anyone who bought or is planning to buy SPY pu...,AMC🚀🚀,Happy Independence Day from a fellow American ...,"guys i am still diamond hands GME, what do i d...",Can’t wait to watch CLOV squirt like the big g...,https://www.google.com/amp/s/www.sfgate.com/tr...,Next week $AAPL to break ATH.,I cant believe TheGhostOfBillHwang's girl woul...,
2,Should I do spy put 0dte options or do 7/9? I ...,Keep wondering if AMC would give shareholders ...,Murica - the most lobby driven country in the ...,Any DD on FTX trading related to the BANGS? Lo...,Clov honestly looks like it's one bad news art...,I've read a few posts about some WSB legends l...,Volume on 7/2 AAPL 140C was pretty insane on f...,Boys have fb & microsoft interview coming up n...,
3,SPY PUTS AT OPEN,The two best squeezes of the year have a few t...,Secret sub is now private. Wish y’all subbed t...,I'm terribly sorry some people from the GME si...,I got a price target of $23 for clover stock o...,British slang for cigarette (google it) = WSB ...,Alright AAPL needs another LED supplier to pum...,Balls deep in tech calls\n\nAAPL 7-16 145c\nFB...,
4,The markets goal is to fuck you. If the SPY is...,[https://franknez.com/heres-why-red-days-shoul...,I base all of my moves on the phase of the moo...,Guy was right that posted yesterday.\n\nI'm ge...,YAY I LOVE THAT THIS IS PRIVATE. I feel so exc...,Balls deep in tech calls\n\nAAPL 7-16 145c\nFB...,All in AAPL next week,How I feel about my 2022 CLNE calls.\n\n*The 5...,
...,...,...,...,...,...,...,...,...,...
320,"Yeah I do FDs, a day or two out etc, if you’re...",,,,,,,,
321,I flipped a 1 dollar spy lotto spread for 40 b...,,,,,,,,
322,"I feel you, if there’s a big move at the end o...",,,,,,,,
323,Puts on SPY I assume?,,,,,,,,


In [274]:
np.random.randint(0, 50, 50)

array([10,  8, 45, 49, 32, 12, 49,  7, 10, 43, 16, 28, 26, 22, 36,  9,  5,
        5, 22, 29, 42, 43, 26,  2,  9, 47,  8,  2, 32,  4,  2,  7, 38,  8,
       22,  4,  0, 43, 12, 35, 22, 16, 44, 40, 31, 45, 37, 35, 31, 38])

In [161]:
# Storage for all comments from the whole subreddit where regex pattern match.
regex_comments_storage = {}
more_comments_obj = {}

# Creating a regex pattern object from string
pattern2 = r'($)?[vV][iI][aA][cC](om|omCBS|CBS)?'
pattern3 = r'[Aa][Mm][Cc]'

reg_exp2 = re.compile(pattern2)

In [162]:
# Get comments from subreddit regardless of the submission. I.e. anywhere on the subreddit. All the newest ones
# equivalent of https://www.reddit.com/r/VIAC/comments/
all_recent_comments = subreddit.comments(limit=500)

for num,comment in enumerate(all_recent_comments):
    if isinstance(comment, MoreComments):
        # This should not be executed as we have converted all more comments to comments above
        print('MORE COMMENTS OBJECT!!!')
        more_comments_obj[comment.id] = comment.body
        continue
    if '**User Report**' in comment.body:
        continue
    # Searching for regex pattern
    if reg_exp2.search(comment.body):
        if comment.id not in regex_comments_storage:
            print('\n {0}: {1}'.format(num,comment.body))
            regex_comments_storage[comment.id] = comment.body
        else:
            continue
    print('\n {0}: {1}'.format(num,comment.body))


 0: Should call it the Gulf of Cuba just to fuck with everyone

 1: No one cares what bags you are holding

 2: You guarantee it should. Nice

 3: Your submission was removed from /r/wallstreetbets due to being too short. All text submissions must be at least 100 characters long.

*I am a bot, and this action was performed automatically. Please [contact the moderators of this subreddit](/message/compose/?to=/r/wallstreetbets) if you have any questions or concerns.*

 4: Delco? Grew up in South Philly. My wife is from Ardmore. We bought our 1st house together in Drexel Hill but recently had a custom home built out in Delaware. Hella cheaper to live out here and now we’re 1/2 way closer to our beach house

 5: Fucking nice bro!

 6: 5500 shares  holding long.    This one has some good catalyst coming soon. Earnings august 11.  Also Hood iPo will move the sector so SOFI will move by then also bank charter 25 soon

 7: You bears have fun on Friday because it bleeds Friday expiring calls, 


 100: Do you guys know how easy it is to figure out what stock each motley fool article is shilling? All you have to do is google “Gumshoe Motley Fool” and you can figure it out.

Imagine paying those exorbitant member fees when you can get the stock tip for free if you really wanted…

 101: Their natural habitat is movie theaters!

 102: reee

 103: I am a bot from /r/wallstreetbets. Your submission was removed because was too short. Please make it a comment if it's not worth expounding on.

 104: Don’t be a pussy! Make another deposit and bust their ass wide open!

 105: Because you bought?

 106: congrats, OP u still have some leftover to buy a lot of red crayons!

Red crayons taste delicious!

 107: No more posting this - no otc, penny stocks, nothing below 1B market cap

 108: Así-así

 109: Because it’s always either broken or more expensive than using supercharger.

 110: GME & AMC are going to rocket Tuesday 🚀🚀🚀
THIS IS THE WAY !

 111: Run.

 112: All in what one tuesday? Nee


 200: Alright man

 201: That’s a good 👍 idea

 202: Your post has been removed because a single link is insufficient DD. Please review the following links before resubmitting.

Please note that abusing DD flair can lead to getting banned.

[Submission Flair Guide](https://www.reddit.com/r/wallstreetbets/wiki/linkflair)

[How to conduct DD](https://www.wallstreetoasis.com/forums/on-the-job-with-simple-as-my-research-process)

*I am a bot, and this action was performed automatically. Please [contact the moderators of this subreddit](/message/compose/?to=/r/wallstreetbets) if you have any questions or concerns.*

 203: This and then going to every post about the ticker they sold calling it a trash pump n dump. No you just bought at the absolute top mate.

 204: Where can I lose money today?

 205: Hero

 206: Bro its Australia we don't riot or any of that shit, we dont shoot eachother, we dont hurt eachother. Our country is built on mateship. Our country isn't going to collapse we have 


 300: SOFI is a great long term play. I think we will see it melt upwards over the next 2 weeks. I’m doubtful there will be a squeeze since 1/2 the apes already moved onto other shit plays. I like the stock

 301: BNGO ANYONE/

 303: I’ve learnt a lot about being working with molten iron in a forge from those streams

 304: I buy undervalued stocks and sell them when they have a good pop and move on to the next one and I’ve made much much more than what amazon, apple or Tesla would have made me in the same amount of time, I’m up 50% in the past 3 months and apple is up only 12% in the past 3 months, so stupidity makes money ig

 305: People thought they were ironically being retards when in reality they were retards unironically

 306: Old School

 307: nice we're up to four now

 308: Thanks dad

 309: Unemployment rate increased because jobless people started looking for work again. US lies about its unemployment by crushing peoples' wills, making them stop looking for jobs, then st


 400: I randomly bought a fuck ton of MCD puts when I was drunk. June 2019 with 1 year expiration June 2020. Made a ton of gains when COVID hit and sold the puts at the bottom. Probably my luckiest trade ever. There’s no way they woulda made me money if COVID never happened

 401: September

 402: Fuck that guy

 403: How can you tell that someone is vegan?    


  


Don't worry, they'll friggin tell you.

 404: Found the “white right winger.”

 405: Usa cartoon express

 406: The one in Washington was first

 407: Yeah. Hahaha and I don't even paper trade. I thought it would be fun as if wsb was going to educate the masses but nope they just wanted us to show our yolos, massive gains and losses. Hahaha good one. But I fucking lost count on when 69 days ended.

 408: This makes it through? Wtf

 409: Most apes can’t read

 410: Get a wife that has a job with health insurance.

 411: Eventually adulthood kicks in for everyone. It took me until 44 but then boom waking up at 6am on the 

In [163]:
regex_comments_storage

{}

In [37]:
dict1 = {}
dict1['Nikita'] = ['Lol']
dict1['Nikita'].append('New')
dict1

{'Nikita': ['Lol', 'New']}

### Below is a testing ground for sample codes | Don't write publishable code below

In [None]:
from praw.models import MoreComments

# Obtaining top 10 hottest posts of today on chosen subreddit
top_10_hottest = subreddit.hot(limit=10)
boolean = True

# Obtaining for each post 20 recent comments.
for post in top_10_hottest:
    print('\033[1m Title: {0} \033[0m \n'.format(post.title))

    for num, comm in enumerate(post.comments):
        if num < 20:
            if isinstance(comm, MoreComments):
                print('\033[1m {0}) MoreComments:\033[0m {1}'.format(str(num), str(comm.comments()[0:100])))
                continue
            # Max 100 characters per comment
            print('\033[1m {0}) Comment:\033[0m {1}'.format(str(num), comm.body[0:100]))
        else:
            break
    print('\n')


In [None]:
# Obtaining 100 newest comments of today on chosen subreddit, not on specific submission

subr2 = 'VIAC'
subr = 'wallstreetbets'

subreddit2 = reddit.subreddit(subr2)

subreddit_wsb = reddit.subreddit(subr)

# for Obtaining new post titles
subreddit2_new = subreddit2.new()

# obtaining newest comments for the subreddit regardless of the submission
subr_comments_new = subreddit_wsb.comments()

# for obtaining newly posted comments regardless of the submission
subreddit2_comments = subreddit2.comments()



#for num, comment in enumerate(subreddit2_comments):
#    print(""" 
#        {0}) Comment: {1}
#        URL: {2}
#        Author: {3}
#        """.format(str(num), comment.body, comment.link_url, comment.author))

for num, comment in enumerate(subr_comments_new):
    print(""" 
        {0}) Comment: {1}
        URL: {2}
        Author: {3}
        """.format(str(num), comment.body, comment.link_url, comment.author))



In [None]:
# Code to search for specific patterns via Regex library
url_submission = 'https://www.reddit.com/r/wallstreetbets/comments/ob59hl/what_are_your_moves_tomorrow_july_01_2021/'
submission_post = reddit.submission(url=url_submission)

pattern = '\wiacom|[Vv][Ii][Aa][Cc]'
pattern2 = '[Aa][Mm][Cc]'

# Converting a pattern into a regular expression object
reg_exp2 = re.compile(pattern)

# array to store matched comments that mentioned ViacomCBS ticker/name
mentioned = []

for num, comment in enumerate(submission_post.comments):
    if isinstance(comment, MoreComments):
        for comm_in in comment.comments():
            print('\033[1m MoreComments:\033[0m {0}'.format(comm_in.body))
            #print('\033[1m {0}) MoreComments:\033[0m {1}'.format(str(num), comment.replace_more_comments().comment))
        continue
    result = reg_exp2.search(comment.body)
    if result:
        mentioned.append(comment.body)
    print('\033[1m {0}) Comment:\033[0m {1}'.format(str(num), comment.body[0:100]))


In [None]:
mentioned

In [None]:
last_update = mentioned.copy()
last_update

In [None]:
len(submission_post.comments)

In [None]:
# Creating a Dataframe to store comments from the above dataframe with their respective submission_id
# temporary storage of submission_ids
dict_subm = {}
list_of_subm_ids = df['submission_id'].tolist()

# Accessing the submission via reddit.submission method
example_submission = reddit.submission(id=list_of_subm_ids[2])
# Sorting all comments in the submission by newest first
example_submission.comment_sort = 'new'

# Flattening the forrest comment to same level (No more top level and replies level comments)
all_comments = example_submission.comments.list()

# Check uniqueness of each comment
comment_done = set()

# In the Praw docs it states that these MoreComments objects are a representation 
# of the load more comments and continue this thread links encountered on Reddit.
# To solve this issue so it loads all the comments and don't get stuck on MoreComments object
for comment in all_comments:
    if isinstance(comment, MoreComments):
        continue
    if '**User Report**' in comment.body:
        continue
    if len(comment.body) > 2 and comment.id not in comment_done:
        if list_of_subm_ids[2] not in dict_subm:
            print('\n' + comment.body)
            comment_done.add(comment.id)
            dict_subm[list_of_subm_ids[2]] = [comment.body]
        else:
            print('\n' + comment.body)
            dict_subm[list_of_subm_ids[2]].append(comment.body)