In [1]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import py_vollib_vectorized as vollib
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from alpaca_trade_api.rest import REST, TimeFrame
import os 
from tradier_data import *
from scraping import *
from alpaca import *

import re
import praw


# building a screener to look for highly shorted stocks trading at below fair value, looking for opportunity to run up through potential meme interest from WSB: 
# 1. screen for stocks with high short interest
# 2. determine short interest is not consequence of a run-up (run up percentage < 40) and that stock is definitively below fair value
# 3. check implied volatility of long term calls and change in open interest, with highly shorted stocks we should expect to see 
# it hedged with calls or sold puts. Any data to the contrary suggests opportunity
# interesting point: It looks like atleast options on robinhood are priced with a 0% riskfree rate given the implied volatility figures they provide

In [9]:
def get_wsb_analysis(): # pulled from tomsant/wsbTrendingStonks

    reddit = praw.Reddit(
      client_id = os.getenv("REDDIT_CLIENT_ID"),
      client_secret = os.getenv("REDDIT_SECRET"),
      user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
    )
    df = []
    for post in reddit.subreddit('wallstreetbets').hot(limit=500):
        content = {
        "title" : post.title,
        "text" : post.selftext
      }
        df.append(content)
    df = pd.DataFrame(df)
    regex = re.compile('[^a-zA-Z ]')
    word_dict = {}
    for (index, row) in df.iterrows():
        # titles
        title = row['title']
        title = regex.sub('', title)
        title_words = title.split(' ')
        # content
        content = row['text']
        content = regex.sub('', content)
        content_words = content.split(' ')
        # combine
        words = title_words + content_words
        for x in words:
            if x in ['A', 'B', 'GO', 'ARE', 'ON']:
                pass

            elif x in word_dict:
                word_dict[x] += 1
            else:
                word_dict[x] = 1
    word_df = pd.DataFrame.from_dict(list(word_dict.items())).rename(columns = {0:"Symbol", 1:"Frequency"})
    ticker_df = pd.read_csv('tickers.csv').rename(columns = {'Name':'Company_Name'})
    stonks_df = pd.merge(ticker_df, word_df, on='Symbol')
    stonks_df["Relative Frequency (%)"] = stonks_df["Frequency"]*100/stonks_df["Frequency"].sum()
    stonks_df["Symbol"] = stonks_df["Symbol"].astype(str)
    return stonks_df [["Symbol","Relative Frequency (%)"]]



In [16]:
df= get_wsb_analysis()

In [13]:
df

In [15]:
df.dtypes

Symbol                     object
Relative Frequency (%)    float64
dtype: object