In [1]:
# importing libraries
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display, HTML

import emoji
import nltk
import re
import requests
import numpy as np
import contractions
import pandas as pd
import plotly.express as px
import plotly
import plotly.graph_objects as go
import seaborn as sns
import ipywidgets as widgets
import gensim

nltk.download()

In [2]:
# a couple of helper functions

def get_pushshift_data(data_type, **kwargs):
    """
    Gets data from the pushshift api.
    
    Data type can be 'comment' or 'submission'
    The rest of the args as interpreted as payload.
    
    Read more: https://github.com/pushshift/api
    """
    
    base_url = f"https://api.pushshift.io/reddit/search/{data_type}/"
    payload = kwargs
    
    request = requests.get(base_url, params=payload)
    
    return request.json()


def make_clickable(val):
    """
    Makes a pandas column clickable.
    """
    
    return '<a href="{}">Link</a>'.format(val)

ASDASDADAS


In [22]:
fig = go.FigureWidget( layout=go.Layout() )
def plot_reddit_submission_activity(ticker):

    data = get_pushshift_data(data_type="submission", q=ticker, after="24h", size=500, aggs="subreddit").get("data")

    df = pd.DataFrame.from_records(data).value_counts("subreddit")[0:10]

    x_data = df.keys()
    y_data = df.values
    
    dict_of_fig = dict({
        "data": [{"type": "bar",
                  "x": x_data,
                  "y": y_data}],
        "layout": {"title": {"text": "Subreddits with most activity - submissions with " + ticker + " in the last 24h",}, "xaxis_title": "Subreddit", "yaxis_title": "No. of Submissions"}
    })

    fig = go.Figure(dict_of_fig)

    fig.show()

In [23]:
interact(plot_reddit_submission_activity, ticker='GME')

ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
widgets.VBox([fig], layout=ipywLayout)

interactive(children=(Text(value='GME', description='ticker'), Output()), _dom_classes=('widget-interact',))

VBox(children=(FigureWidget({
    'data': [], 'layout': {'template': '...'}
}),), layout=Layout(border='solid …

In [6]:
from nltk.sentiment import SentimentIntensityAnalyzer as SIA

def get_sentiment(sia, text):
    if sia.polarity_scores(text)["compound"] > 0:
        return "Positive"
    elif sia.polarity_scores(text)["compound"] < 0:
        return "Negative"
    else:
        return "Neutral"
    
sia = SIA()  

In [7]:
from gensim.utils import simple_preprocess
def remove_stopwords(texts):
    return [[word for word in simple_preprocess(str(doc)) if word not in stop_words] for doc in texts]

In [8]:
from nltk.corpus import stopwords
import spacy 
spacy.load('en_core_web_sm')

stop_words = stopwords.words('english')
spacy_stopwords = spacy.lang.en.stop_words.STOP_WORDS
def sent_to_words(sentences):
    for sentence in sentences:
        yield(gensim.utils.simple_preprocess(str(sentence), deacc=False))  # deacc=True removes punctuation

In [9]:
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet

def get_wordnet_pos(word):
    """Map POS tag to first character lemmatize() accepts"""
    tag = nltk.pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}

    return tag_dict.get(tag, wordnet.NOUN)

In [43]:
fig = go.FigureWidget( layout=go.Layout() )
def plot_reddit_title_sentiment(ticker):
    labels = ['Neutral','Positive','Negative']
    colors = ['blue', 'green', 'red']
    mode_size = [8]
    line_size = [5]
    
    data = get_pushshift_data(q=ticker, data_type="submission", after="24h", size=500, sort_type="score", sort="desc", subreddit="wallstreetbets").get("data")
    df = pd.DataFrame.from_records(data)[["title", "created_utc","permalink"]]
    
    data = df.title.tolist()
    data = [re.sub('\s+', ' ', str(sent)) for sent in data]
    data = [re.sub(r"(?:\@|https?\://)\S+", "", str(sent)) for sent in data]
    data = [emoji.demojize(str(sent), delimiters=("", " ")) for sent in data]
    data = [re.sub('[\s]+', ' ', str(sent)) for sent in data]
    data = [re.sub(r'\b(\w+)( \1\b)+', r'\1', str(sent)) for sent in data]
    data = [contractions.fix(str(sent)) for sent in data]
    data = [re.sub(r"\b\d+\b", " ", str(sent)) for sent in data]
    data = [re.sub(r'[^\x00-\x7F]+',"'", str(sent)) for sent in data]
    data = [re.sub(r'[^\w]|_',' ', str(sent)) for sent in data]
    data = [re.sub(r'\bgamestop\b', 'gme', str(sent)) for sent in data]
    data = [re.sub(r'\bgamestops\b', 'gme', str(sent)) for sent in data]
    data = [re.sub(r'\bgamestonk\b', 'gme', str(sent)) for sent in data]
    data = [re.sub(r'\bgalaxy\b', 'gme', str(sent)) for sent in data]
    data = [re.sub(r'\bmelvin\b', 'gme', str(sent)) for sent in data]
    data = [re.sub(r'\brocket\b', 'gme', str(sent)) for sent in data]
    data = [re.sub(r'\bmoon\b', 'gme', str(sent)) for sent in data]
    data = [re.sub(r'\bmooning\b', 'gme', str(sent)) for sent in data]
    data = [re.sub('[\s]+', ' ', str(sent)) for sent in data]
    data = [str(sent).lower() for sent in data] 
    data_words = list(sent_to_words(data))
    data_words_nostops = remove_stopwords(data_words)
    lemmatizer = WordNetLemmatizer()
    lemmatized_data = [[lemmatizer.lemmatize(ch, get_wordnet_pos(ch)) for ch in word] for word in (data_words_nostops)]
    string_data =  [' '.join(sent) for sent in lemmatized_data]
    df['cleaned_title'] = string_data

    df['Sentiment'] = df['cleaned_title'].apply(lambda x : get_sentiment(sia, str(x)))
    df['compound'] = [sia.polarity_scores(str(x))['compound'] for x in df['cleaned_title']]
    df['neg'] = [sia.polarity_scores(str(x))['neg'] for x in df['cleaned_title']]
    df['neu'] = [sia.polarity_scores(str(x))['neu'] for x in df['cleaned_title']]
    df['pos'] = [sia.polarity_scores(str(x))['pos'] for x in df['cleaned_title']]
    
    values = df['Sentiment'].value_counts(normalize=True)
    
    dict_of_fig = dict({
        "data": [{"type": "pie",
                  "labels": labels,
                  "values": values,
                  "marker": {'colors': ['#e6f2ff', '#38F29D', '#F30000']},
                  "textinfo":"percent+label"}],
        "layout": {"title": {"text": "r/wallstreetbets sentiment - submissions with " + ticker + " in the last 24h",}}
    })

    fig = go.Figure(dict_of_fig)

    fig.show()

In [44]:
interact(plot_reddit_title_sentiment, ticker='GME')

ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
widgets.VBox([fig], layout=ipywLayout)

interactive(children=(Text(value='GME', description='ticker'), Output()), _dom_classes=('widget-interact',))

VBox(children=(FigureWidget({
    'data': [], 'layout': {'template': '...'}
}),), layout=Layout(border='solid …

In [148]:
fig = go.FigureWidget( layout=go.Layout() )
def plot_reddit_ticker_frequency(ticker):

    data = get_pushshift_data(q=ticker, data_type="submission", after="24h", size=500, sort_type="score", sort="desc", subreddit="wallstreetbets").get("data")
    df = pd.DataFrame.from_records(data)[["title"]]
    count=len(df.index)
    
    dict_of_fig = dict({
        "data": [{"type": "indicator",
                  "value": count,
                  "delta": {'reference': 160},
                  "mode" : "number+delta+gauge",
                 }],
        "layout": {"template": {"data": {'indicator': [{
        'title': "Frequency of submissions with " + ticker + " mentioned in the last 24h"}]}
        }}})

    fig = go.Figure(dict_of_fig)

    fig.show()

In [149]:
interact(plot_reddit_ticker_frequency, ticker='GME')

ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
widgets.VBox([fig], layout=ipywLayout)

interactive(children=(Text(value='GME', description='ticker'), Output()), _dom_classes=('widget-interact',))

VBox(children=(FigureWidget({
    'data': [], 'layout': {'template': '...'}
}),), layout=Layout(border='solid …

In [183]:
from pandas_datareader import data
import datetime
import ta

In [184]:
fig = go.FigureWidget( layout=go.Layout() )
def plot_VPT(ticker):

    start_date = '2020-01-01'
    end_date = datetime.datetime.now()

    panel_data = data.DataReader(ticker,'yahoo', start_date, end_date)
    close = panel_data['Close']
    volume = panel_data['Volume']
    panel_data['VPT'] =ta.volume.volume_price_trend(close, volume)
    ma_long_VPT = panel_data[('VPT')].rolling(window=5).mean()
    panel_data['moving_average'] = ma_long_VPT
    panel_data['change_alert'] = ((panel_data['VPT']-panel_data['moving_average']))
    
    latest_value=panel_data['change_alert'].iloc[-1]
    
    dict_of_fig = dict({
        "data": [{"type": "indicator",
                  "value": latest_value,
                  "delta": {'reference': 2.719832e+07},
                  "mode" : "number+delta+gauge",
                 }],
        "layout": {"template": {"data": {'indicator': [{
        'title': "VPT Percentage Change Alert for " + ticker}]}
        }}})

    fig = go.Figure(dict_of_fig)

    fig.show()

In [185]:
interact(plot_VPT, ticker='GME')

ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
widgets.VBox([fig], layout=ipywLayout)

interactive(children=(Text(value='GME', description='ticker'), Output()), _dom_classes=('widget-interact',))

VBox(children=(FigureWidget({
    'data': [], 'layout': {'template': '...'}
}),), layout=Layout(border='solid …