### Here, we will create a Jupyter Notebook that fetches live data, builds an interactive plot and then deploy it as a live dashboard.

We will use Reddit as the source of data for our dashboard using the pushshift api

In [1]:
import numpy as np
import seaborn as sns
import pandas as pd
import cufflinks as cf 
import plotly.express as px
%matplotlib inline


# from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# init_notebook_mode(connected=True)

import plotly.graph_objs as go
import plotly.io as pio
# pio.renderers.default = 'colab'

In [2]:
import requests
import textblob
import nltk
# nltk.download('punkt')
pd.set_option('display.max_colwidth',None) 

In [3]:
import requests 
#fetch comments mentioning the word Machine Learning
url = "https://api.pushshift.io/reddit/search/comment/?q=machine+learning"
request = requests.get(url)

json_response = request.json()

In [4]:
#function to make this API call automated


def get_data(d_type,**kwargs):
  """
    Gets data from the pushshift api.

    data_type can be 'comment' or 'submission'
    The rest of the args are interpreted as payload.
    """

  base_url = f"https://api.pushshift.io/reddit/search/{d_type}/"
  payload = kwargs
  request= requests.get(base_url, params= payload)
  return request.json()

def make_clickable(val):
    """
    Makes a pandas column clickable.
    """
    
    return '<a href="{}">Link</a>'.format(val)

In [5]:
data_test= get_data(d_type="comment",     # give me comments
                   q="Machine Learning",              # that mention 'python'
                   after="48h",             # in the last 48 hours
                   size=1000,               # maximum 1000 comments
                   sort_type="score",       # sort them by score
                   sort="desc") 

### Comment activity 

In [6]:
#in what subreddits does Machine Learning Appear More?

#define variable

COMMENT_COLOR         = "blueviolet"
SUBMISSION_COLOR      = "darkgreen"
TEXT_PREVIEW_SIZE     = 240
TERM_OF_INTEREST      = "Data Science"
SUBREDDIT_OF_INTEREST = "Data Science"
TIMEFRAME             = "48h"


data = get_data(d_type="comment", q=TERM_OF_INTEREST, after=TIMEFRAME, size=1000, aggs="subreddit").get("data")

df = pd.DataFrame.from_records(data).value_counts("subreddit")[0:10]

x = df.keys()
y = df.values

df.head()                   

subreddit
news           5
NoNewNormal    4
france         4
ukpolitics     3
datascience    3
dtype: int64

In [7]:
fig = px.bar(df,
       x=x,
       y=y,
       title=f"Subreddits with most activity - comments with '{TERM_OF_INTEREST}' in the last {TIMEFRAME}",
       labels={"doc_count": "# comments","key": "Subreddits"},
       color_discrete_sequence=[COMMENT_COLOR],
       height=500,
       width=800)
fig.show() 

### Submission activity

In [8]:
data = get_data(d_type="submission", q=TERM_OF_INTEREST, after=TIMEFRAME, size=1000, aggs="subreddit").get("data")

df = pd.DataFrame.from_records(data).value_counts("subreddit")[0:10]
x = df.keys()
y = df.values


px.bar(df,
       x=x,
       y=y,
       title=f"Subreddits with most activity - submissions with '{TERM_OF_INTEREST}' in the last {TIMEFRAME}",
       labels={"doc_count": "# submissions","key": "Subreddits"},
       color_discrete_sequence=[SUBMISSION_COLOR],
       height=500,
       width=800)

### Most upvoted comments 

In [9]:
data = get_data(d_type="comment", q=TERM_OF_INTEREST, after=TIMEFRAME, size=10, sort_type="score", sort="desc").get("data")

df = pd.DataFrame.from_records(data)[["author", "subreddit", "score", "body", "permalink"]]

# we only keep the first X characters of the body 
df.body = df.body.str[0:50] + "..."

# we append the string to all the permalink entries
df.permalink = "https://reddit.com" + df.permalink.astype(str)


print(f"\nTop 10 most upvoted comments with '{TERM_OF_INTEREST}' in the past {TIMEFRAME}\n")

# style the last column to be clickable and print --- using the helper function above
df.style.format({'permalink': make_clickable})


Top 10 most upvoted comments with 'Data Science' in the past 48h



Unnamed: 0,author,subreddit,score,body,permalink
0,Isz82,television,27,A [link](https://www.livescience.com/46397-science...,Link
1,po-handz,buildapcsales,18,"As someone who just sold his 1950x for a 10940x, a...",Link
2,Sideswipe0009,Conservative,16,>“How did the state determine that seniors were...,Link
3,msd483,datascience,13,"To add on with the obvious, this isn't a data scie...",Link
4,JasonVeritech,DaystromInstitute,12,I'll repost my thoughts from an earlier thread: &...,Link
5,darth_faader,ABoringDystopia,11,If you have a formal mechanical engineering educat...,Link
6,DartmouthSimp,ApplyingToCollege,11,Axis Labels: **✅** Graph Title: **✅** Line of be...,Link
7,hgwellsinsanity,CoronavirusMichigan,10,It's absolutely astonishing given that the Governo...,Link
8,GandalfsPlumbs,dankmemes,8,"In Shintoism, the Emperor was perceived as a divin...",Link
9,K0NSPIRACY,CODWarzone,8,"No, that’s not the argument I’m trying to make at ...",Link


### Most Commented Submissions


In [10]:
data = get_data(d_type="submission", q=TERM_OF_INTEREST, after=TIMEFRAME, size=10, sort_type="num_comments", sort="desc").get("data")


# to see what columns interest you, simply list(df)
df = pd.DataFrame.from_records(data)[["author", "subreddit", "num_comments", "title", "permalink"]]

# we only keep the first X characters of the body 
df.title = df.title[0:240] + "..."

# we append the string to all the permalink entries
df.permalink = "https://reddit.com" + df.permalink.astype(str)


print(f"\nTop 10 most commented submissions with '{TERM_OF_INTEREST}' in the past {TIMEFRAME}\n")

# style the last column to be clickable and print
df.style.format({'permalink': make_clickable})


Top 10 most commented submissions with 'Data Science' in the past 48h



Unnamed: 0,author,subreddit,num_comments,title,permalink
0,Ralts_Bloodthorne,HFY,135,First Contact - Non Canon Stuff - Chapter 464...,Link
1,DadTheMaskedTerror,changemyview,117,"CMV: Investing Advice to Index, Buy & Hold, & Not Time the Market Is Bunk...",Link
2,sbdivya,books,93,"I am S.B. Divya, science fiction author, Escape Pod co-editor, and data scientist. AMA!...",Link
3,Hydraulicdespotism,pennystocks,80,"ReconAfrica ($RECO, $RECAF) - Catalyst - Well #1 complete - company hires Diana McQueen, previous Canadian Minister of Energy...",Link
4,CertainListen,computerscience,79,Seriously Considering Dropping Out of Computer Science...,Link
5,One_Organization_165,gradadmissions,72,Harvard is really poor at Computer Science...,Link
6,jazzwhiz,Physics,70,When this post is 24h old Fermilab will announce the highly anticipated muon g-2 results in a free public talk. Links and more information inside....,Link
7,anothermetaphor,gis,38,What are core elements of a GIS engineer?...,Link
8,CaptainPreposterous,conspiracy,38,Trust science when it comes to the vaccine they say......,Link
9,Rizz2305,ukmedicalcannabis,33,"Thanks to everyone for keeping my head up, this is the email I have just sent to my local MP, NHS England, NICE and Matt Hancock...",Link


### sentiment in /r/python across time

In [28]:
data = get_data(d_type="comment",
                          after=TIMEFRAME,
                          size=1000,
                          sort_type="score",
                          sort="desc",
                          subreddit="python").get("data")

# define a list of columns we want to keep
columns_of_interest = ["author", "body", "created_utc", "score", "permalink"]
print("Comments with the word Python in the last 48 hours")
# transform the response into a dataframe
df = pd.DataFrame.from_records(data)[columns_of_interest]
df.permalink = "https://reddit.com" + df.permalink.astype(str)
df.iloc[:10].style.format({'permalink': make_clickable})


Comments with the word Python in the last 48 hours


Unnamed: 0,author,body,created_utc,score,permalink
0,Zomunieo,No. All keywords in Python contain no spaces. There's no good reason to break that rule.,1617843708,11,Link
1,BigMac3k,Let’s see if we can break it!,1617746266,8,Link
2,tahafyto,Super cool! How is this not popular? Sucks that my gpu has only 2 GB vram.,1617831454,8,Link
3,HartzToTheIV,"As far as I know, some companies have pretty much perfected voice cloning already, but decided against publishing software (I think it was Adobe with some kind of ""voice photoshop""). You could do some really terrible stuff with it. From a basic security concern to outright criminiality, there's a wide range of uses for this kind of application. If you have seen what deep fakes can do, imagine the same stuff but with real voices. Celebrity porn would be the least of our problems. It's a fascinating technology, and I guess it will become widespread before too long, but I really don't want to be a public speaker in any way when that time comes.",1617835069,6,Link
4,17291,"upvoted_submission_comments = sorted(  submission.comments, key=lambda x: x.score, reverse=True  )  downvoted_submission_comments = sorted(submission.comments, key=lambda x: x.score) You shouldn't need to sort the comments twice to get the bottom 5 and top 5. Instead, you could do something like this (some code snipped for brevity):  submission_comments = sorted(submission.comments, key=lambda x: x.score)  for comment in submission_comments[:-6:-1]:  edited_body = edited_body + f""'{comment.body}',\n\n""  for comment in submission_comments[:5]:  edited_body = edited_body + f""'{comment.body}',\n\n""",1617746768,6,Link
5,swarm_mazer,\n ! ;;; & $(),1617747233,5,Link
6,joeyGibson,"I just updooted the post, but the updoot count didn’t change. 😢",1617747600,5,Link
7,mightymander,damm wish it supported amd GPU's,1617834969,5,Link
8,stargazer_w,"I thought they released an official rocm version resently, but haven't checked it out yet",1617836251,4,Link
9,delasislas,"I don’t know, let’s seen what this can do.",1617746103,4,Link


### Comments dataset after adding sentiment analysis data

In [27]:


df["sentiment_polarity"] = df.apply(lambda row: textblob.TextBlob(row["body"]).sentiment.polarity, axis=1)

# column with sentiment subjectivity
df["sentiment_subjectivity"] = df.apply(lambda row: textblob.TextBlob(row["body"]).sentiment.subjectivity, axis=1)

# column with 'positive' or 'negative' depending on sentiment_polarity
df["sentiment"] = df.apply(lambda row: "positive" if row["sentiment_polarity"] >= 0 else "negative", axis=1)

# column with a text preview that shows the first 50 characters
df["preview"] = df["body"].str[0:50]

#created_utc parameter and tranform it into a datetime column
df["date"] = pd.to_datetime(df['created_utc'],unit='s')

df.head(10)

Comments after adding sentiment analysis


Unnamed: 0,author,body,created_utc,score,permalink,sentiment_polarity,sentiment_subjectivity,sentiment,preview,date
0,Zomunieo,No. All keywords in Python contain no spaces. There's no good reason to break that rule.,1617843708,11,https://reddit.com/r/Python/comments/mmgbin/whats_your_opinion_on_changing_elif_into_else_if/gtr8dk3/,-0.35,0.6,negative,No. All keywords in Python contain no spaces. Ther,2021-04-08 01:01:48
1,BigMac3k,Let’s see if we can break it!,1617746266,8,https://reddit.com/r/Python/comments/mlmx1k/this_is_a_living_reddit_post_see_the_post_body/gtmc42e/,0.0,0.0,positive,Let’s see if we can break it!,2021-04-06 21:57:46
2,tahafyto,Super cool! How is this not popular? Sucks that my gpu has only 2 GB vram.,1617831454,8,https://reddit.com/r/Python/comments/mmarp8/voice_cloning_app/gtqjxv2/,0.034167,0.703333,positive,Super cool! How is this not popular? Sucks that my,2021-04-07 21:37:34
3,HartzToTheIV,"As far as I know, some companies have pretty much perfected voice cloning already, but decided against publishing software (I think it was Adobe with some kind of ""voice photoshop""). You could do some really terrible stuff with it. From a basic security concern to outright criminiality, there's a wide range of uses for this kind of application. If you have seen what deep fakes can do, imagine the same stuff but with real voices. Celebrity porn would be the least of our problems.\n\nIt's a fascinating technology, and I guess it will become widespread before too long, but I really don't want to be a public speaker in any way when that time comes.",1617835069,6,https://reddit.com/r/Python/comments/mmarp8/voice_cloning_app/gtqrh1h/,0.0875,0.516667,positive,"As far as I know, some companies have pretty much",2021-04-07 22:37:49
4,17291,"upvoted_submission_comments = sorted(\n submission.comments, key=lambda x: x.score, reverse=True\n )\n downvoted_submission_comments = sorted(submission.comments, key=lambda x: x.score)\n\nYou shouldn't need to sort the comments twice to get the bottom 5 and top 5. Instead, you could do something like this (some code snipped for brevity):\n\n submission_comments = sorted(submission.comments, key=lambda x: x.score)\n for comment in submission_comments[:-6:-1]:\n edited_body = edited_body + f""'{comment.body}',\n\n""\n for comment in submission_comments[:5]:\n edited_body = edited_body + f""'{comment.body}',\n\n""",1617746768,6,https://reddit.com/r/Python/comments/mlmx1k/this_is_a_living_reddit_post_see_the_post_body/gtmd5ed/,0.5,0.5,positive,upvoted_submission_comments = sorted(\n,2021-04-06 22:06:08
5,swarm_mazer,\n ! ;;; &amp; $(),1617747233,5,https://reddit.com/r/Python/comments/mlmx1k/this_is_a_living_reddit_post_see_the_post_body/gtme2wl/,0.0,0.0,positive,\n ! ;;; &amp; $(),2021-04-06 22:13:53
6,joeyGibson,"I just updooted the post, but the updoot count didn’t change. 😢",1617747600,5,https://reddit.com/r/Python/comments/mlmx1k/this_is_a_living_reddit_post_see_the_post_body/gtmet3d/,0.0,0.0,positive,"I just updooted the post, but the updoot count did",2021-04-06 22:20:00
7,mightymander,damm wish it supported amd GPU's,1617834969,5,https://reddit.com/r/Python/comments/mmarp8/voice_cloning_app/gtqr9t7/,0.0,0.0,positive,damm wish it supported amd GPU's,2021-04-07 22:36:09
8,stargazer_w,"I thought they released an official rocm version resently, but haven't checked it out yet",1617836251,4,https://reddit.com/r/Python/comments/mmarp8/voice_cloning_app/gtqtto8/,0.0,0.0,positive,I thought they released an official rocm version r,2021-04-07 22:57:31
9,delasislas,"I don’t know, let’s seen what this can do.",1617746103,4,https://reddit.com/r/Python/comments/mlmx1k/this_is_a_living_reddit_post_see_the_post_body/gtmbrvt/,0.0,0.0,positive,"I don’t know, let’s seen what this can do.",2021-04-06 21:55:03


In [25]:
px.scatter(df, x="date", # date on the x axis
               y="sentiment_polarity", # sentiment on the y axis
               hover_data=["author", "permalink", "preview"], # data to show on hover
               color_discrete_sequence=["lightseagreen", "indianred"], # colors to use
               color="sentiment", # what should the color depend on?
               size="score", # the more votes, the bigger the circle
               size_max=20, # not too big
               labels={"sentiment_polarity": "Comment positivity", "date": "Date comment was posted"}, # axis names
               title=f"Comment sentiment in /r/python for the past 48h", # title of figure
          )