In [None]:
# This notebook ranks finds the most popular stocks (tickers) on any reddit discussion
# Similar to www.wsbdaily.com but - 
# While wsbdaily.com describe last 24 hours, I describe based on user-specified number of hours to look back.
# Ex : It finds most popular stocks in last 3 hours or last any number of hours as specified by you.
# Also helps to view all comments which talk about a particular stock

In [37]:
#importing packages
import praw, pandas as pd
from time import time
from datetime import datetime,timedelta

from spacy.lang.en.stop_words import STOP_WORDS
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English
nlp = English()

In [38]:
#First you need your reddit API credentials
# follow this blog post to get them: https://www.jcchouinard.com/get-reddit-api-credentials-with-praw/

# create praw reddit instance - 
reddit = praw.Reddit(
    client_id="AXQfxvxQPMi36b7PH1D-ZQ",
    client_secret="4G4iRkfLHXPUfQOS8OYvJ6L8ko7_IQ",
    password="Your-reddit-account-password",
    user_agent="test script",
    username="Your-reddit-username",
)

In [39]:
#Should print your reddit username
print(reddit.user.me())


In [46]:
#Extracting comments in a subreddit discussion sorted by reddit's "Best" filter 

#Enter URL of a discussion thread
url=input("Enter URL of today's Daily Discussion of the subreddit wallstreetbets: ")
#replace limit

repLimit=input("Enter 600 to get best 60-70% comments(takes 10 minutes to run) and 1200 for top 80+% comments(takes 15-20 minutes to run) : ")
###########################

st=time()
submission = reddit.submission(url=url)
print('submission created')
print("Extracting comments...")
submission.comments.replace_more(limit=int(repLimit))
print('replace more limit set as',repLimit)
print()
print("Number of comments in the thread:",len(submission.comments.list()))

#appending comments
submissionList = []
for comment in submission.comments.list():
#     print(comment)
#     print(comment.body)
    submissionList.append(comment)
print(len(submissionList),'comments extracted')

print()
print('time taken',round((time()-st)/60),'minutes')

Enter URL of today's Daily Discussion of the subreddit wallstreetbets: https://www.reddit.com/r/wallstreetbets/comments/qnjay6/weekend_discussion_thread_for_the_weekend_of/
Enter 600 to get best 60-70% comments(takes 10 minutes to run) and 1200 for top 80+% comments(takes 15-20 minutes to run) : 600
submission created
Extracting comments...
replace more limit set as 600

Number of comments in the thread: 8355
8355 comments extracted

time taken 8 minutes


In [47]:
# Enter number of hours to look back for popular tickers
hoursBack=input("How many hours behind do you wanna look? :")

current_time=datetime.now()

fewHourComments=[]
fewhoursback=current_time-timedelta(hours=int(hoursBack))
for comment in submissionList:
    if datetime.fromtimestamp(comment.created_utc)>=fewhoursback:
        fewHourComments.append(comment)
print(len(fewHourComments), "comments in the last "+hoursBack+" hours")

How many hours behind do you wanna look? :12
2564 comments in the last 12 hours


In [48]:
#spacy word tokenizer 
tokenizer = Tokenizer(nlp.vocab)


In [49]:
# extracting ticker frequency 
tickerDict=dict()
saver={}
for i,x in enumerate(fewHourComments):
    
    #logic to extract tickers
    tickers=list(set([str(tic) for tic in tokenizer(x.body) if (str(tic).isupper() and len(str(tic))<=4 and nlp.vocab[str(tic)].is_stop==False and not(any(k.isdigit() for k in str(tic))))]))
    for m in tickers:
        if str(x) in tickerDict.keys():
            if m not in tickerDict[str(x)]:
                tickerDict[str(x)].append(m)
        else:
            tickerDict[str(x)]=[m]
    
    
    saver[str(x)]=x.body

In [50]:
# appending relevant comments data in dataframe for future use
comdf=pd.DataFrame()
for comment in fewHourComments:

    comdf=comdf.append({'author':comment.author,'comment':comment.body,'utc':comment.created_utc,
                       'upvotes':comment.score,'url':'reddit.com/'+comment.permalink,
                        'replies':comment.replies.__len__()},ignore_index=True)


print(comdf.shape)

(2564, 6)


In [51]:
comdf.head()

Unnamed: 0,author,comment,utc,upvotes,url,replies
0,VisualMod,#Ban Bet Lost\n\n/u/_Apache_Helicopter_ (0/1) ...,1636217000.0,1.0,reddit.com//r/wallstreetbets/comments/qnjay6/w...,0.0
1,trashcanpandas,I miss being single. sleeping around and meeti...,1636183000.0,26.0,reddit.com//r/wallstreetbets/comments/qnjay6/w...,6.0
2,AnalLeekage,"My banbet is over, I'm back bitches.\n\n!banbe...",1636206000.0,30.0,reddit.com//r/wallstreetbets/comments/qnjay6/w...,0.0
3,AdPotential6247,my sister's having a baby! im gonna be a dad!,1636206000.0,18.0,reddit.com//r/wallstreetbets/comments/qnjay6/w...,0.0
4,Turd-Lipstick,If I was holding /u/cashflow_ and Remy off a c...,1636212000.0,20.0,reddit.com//r/wallstreetbets/comments/qnjay6/w...,5.0


In [53]:
#save the dataframe for future use (change name as per requirement)
name=input("Enter file name by which you'd like to save:")
comdf.to_csv(name+'.csv',index=False)
print("File saved as:",name+'.csv')

Enter file name by which you'd like to save:6th_November_weekend_thread_last_12_hours
File saved as: 6th_November_weekend_thread_last_12_hours.csv


In [54]:
#create frequency dictionary
tickerFrequency=dict()
for comment in tickerDict.keys():
    for tic in tickerDict[comment]:
        if tic not in tickerFrequency.keys():
            tickerFrequency[tic]=1
        else:
            tickerFrequency[tic]+=1

In [55]:
tickerFrequency

{'PINS': 2,
 'SPY': 34,
 'U.S': 1,
 'CNBC': 3,
 '$SPY': 3,
 'ROPE': 1,
 'WSB.': 4,
 'WSB': 9,
 'TSLA': 15,
 'ARK': 1,
 'DD.': 2,
 'COD': 1,
 'LYV': 2,
 'UPS': 1,
 'DOW': 1,
 'FYI': 1,
 'GME': 21,
 'LCID': 4,
 'PLUG': 7,
 'LEV.': 1,
 'PYPL': 4,
 'PDT': 1,
 'S&P': 2,
 'EV': 7,
 'PTRA': 4,
 'KIND': 1,
 'KVSB': 1,
 'PTON': 4,
 'LSD': 2,
 'OCGN': 2,
 'WEEK': 2,
 'ER': 1,
 'AMC': 3,
 'CLF.': 3,
 'TA?': 1,
 'VTI': 1,
 'ARKK': 1,
 'QCOM': 2,
 'JRE': 1,
 'F': 5,
 'NET': 1,
 'COIN': 1,
 '$T,': 1,
 '$BBD': 1,
 'OMG': 1,
 'RIP': 4,
 'VRBO': 1,
 'V': 1,
 'LTHM': 1,
 'CAT': 7,
 'ATH': 3,
 'SKLZ': 2,
 'NVDA': 12,
 'CLNE': 2,
 '$URI': 1,
 '$X': 1,
 '$CAT': 1,
 'H': 1,
 'ON,': 1,
 'SPOT': 1,
 'SNAP': 1,
 'LYV.': 1,
 'MOON': 1,
 '#ALL': 1,
 'BAM': 1,
 'RED': 4,
 'ATH.': 1,
 'BILL': 2,
 'LOSE': 1,
 'SELL': 2,
 'IM': 2,
 'AMD': 5,
 'CLF': 12,
 'AMC.': 1,
 'DD': 2,
 'AH': 1,
 'FUBO': 2,
 'CASH': 1,
 'HITS': 1,
 'PILE': 1,
 '$QQQ': 1,
 'ZZZZ': 1,
 'CEO,': 1,
 'NIO': 1,
 'ATVI': 2,
 'ICLN': 1,
 'VT': 1,
 'BA

In [56]:
#storing in dataframe
tickerFreqDf=pd.DataFrame(tickerFrequency.items(),columns=['ticker','counts'])

In [57]:
#sort by popularity of stock/ticker
tickerSorted=tickerFreqDf.sort_values("counts",ascending=False).head(20)

In [58]:
#add rank
tickerSorted['rank']=list(range(1,len(tickerSorted)+1))

In [59]:
#Stocks/Tickers sorted by frequency/popularity 
print("Stocks/Tickers sorted by popularity in last",hoursBack,"hours")
print("'counts' column shows the number of unique comments which talked about the stock")
tickerSorted

Stocks/Tickers sorted by popularity in last 12 hours
'counts' column shows the number of unique comments which talked about the stock


Unnamed: 0,ticker,counts,rank
1,SPY,34,1
16,GME,21,2
8,TSLA,15,3
52,NVDA,12,4
72,CLF,12,5
99,*I,10,6
7,WSB,9,7
18,PLUG,7,8
23,EV,7,9
220,MA,7,10


In [63]:
#View comments based on the stock ticker
stock=input('Enter stock ticker:')
print()
for comment in saver.values():
    for x in tickerSorted.ticker:
        if x in comment and stock in comment:
            print('#########')
            print(comment)

Enter stock ticker:NVDA

#########
NVDA’s ceo is a relative of AMDs ceo. That family was probably making computer chips in the feudal era or some shit
#########
NVDA’s ceo is a relative of AMDs ceo. That family was probably making computer chips in the feudal era or some shit
#########
What if NET signs a deal with NVDA?

I’m pretty high

Still be awesome tho

Go to sleep, nerd
#########
NVDA hits all markets - https://forums.evga.com/Notice-of-Stolen-EVGA-GeForce-RTX-30Series-Graphics-Cards-m3490851.aspx
#########
NVDA hits all markets - https://forums.evga.com/Notice-of-Stolen-EVGA-GeForce-RTX-30Series-Graphics-Cards-m3490851.aspx
#########
NVDA hits all markets - https://forums.evga.com/Notice-of-Stolen-EVGA-GeForce-RTX-30Series-Graphics-Cards-m3490851.aspx
#########
Bought AMD and NVDA weeklies before close for the fuck of it. Pls moon.
#########
Bought AMD and NVDA weeklies before close for the fuck of it. Pls moon.
#########
Anyone have any thoughts on NVDA for next week
########

In [None]:
# The end