# Sci-Fi IRL #1: Technology Terminology Velocity

### A Data Storytelling Project by Tobias Reaper

### ---- Datalogue 008 ----

---
---

### Imports and Configuration

In [1]:
# Three Musketeers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# For using the API
import requests

In [11]:
# More advanced vizualizations with Bokeh
from bokeh.plotting import figure, output_file, output_notebook, show
from bokeh.layouts import column
from bokeh.models.glyphs import Patches

In [12]:
# Import color library
import colorcet as cc

In [13]:
# Define color palette
palette = [cc.bkr[i*15] for i in range(17)]
palette

['#1881fa',
 '#2774dd',
 '#2e67c0',
 '#315aa4',
 '#314e89',
 '#2f426f',
 '#2b3656',
 '#262b3e',
 '#212128',
 '#28201e',
 '#3d2622',
 '#542d26',
 '#6b332b',
 '#82392f',
 '#9a3f34',
 '#b34538',
 '#cc4a3c']

In [2]:
# Set pandas display options to allow for more columns and rows
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 500)

---

### Functions

In [102]:
def pushshift_api_request(query, subreddit, frequency="month", aggs="created_utc"):
    """
    Returns the JSON response of a PushShift API aggregate comment search as a Python dictionary.
    
    Note: if you're reading this note, that means that this function is still only written
    with the intention of automating a specific set of actions for a specific project.
    
    ---- Arguments ----
    query: (str) keyword to search.
    subreddit: (str) subreddit name
    frequency: (str) set the size of the time buckets.
    aggs: (str) aggregate function name. Default is "created_utc".
    (For more information, read the PushShift API Documentation.)
    -------------------
    """
    
    # Build the query url based on endpoints and parameters 
    url = f"https://api.pushshift.io/reddit/search/comment/?q={query}&subreddit={subreddit}&aggs={aggs}&frequency={frequency}&size=100"
    
    # Send the request and save the response into the response object
    response = requests.get(url)
    
    # Check the response; stop execution if failed
    assert response.status_code == 200
    
    # Parse the JSON into a Python dictionary
    # and return it for further processing
    return response.json()

In [105]:
def create_df(data, keyword, frequency="month"):
    """
    Returns cleaned Pandas DataFrame of keyword frequency over time, given correctly-formatted Python dictionary.
    Renames the frequency column to keyword; converts month to datetime.
    
    Note: if you're reading this note, that means that this function is still only written
    with the intention of automating a specific set of actions for a specific project.
    
    ---- Arguments ----
    data: (dict) Python dictionary converted from JSON API response.
    keyword: (str) the keyword that was queried.
    time_bucket: (str) size of time buckets, which is also the name of the resulting DataFrame column. Defaults to "month".
    -------------------
    """
    
    # Convert the python object into a pandas dataframe
    df = pd.DataFrame(data["aggs"]["created_utc"])

    # Convert "key" into a datetime column
    df["key"] = pd.to_datetime(df["key"], unit="s", origin="unix")

    # Rename "key" to reflect the fact that it is the beginning of the time bucket
    df = df.rename(mapper={"key": frequency, "doc_count": keyword}, axis="columns")
    
    # Return the DataFrame
    return df

In [106]:
def comments_df(data):
    """
    Returns Reddit comments in Pandas DataFrame, given the correctly-formatted Python dictionary.
    
    Note: if you're reading this note, that means that this function is still only written
    with the intention of automating a specific set of actions for a specific project.
    
    ---- Arguments ----
    data: (dict) Python dictionary converted from JSON API response.
    -------------------
    """
    
    # Convert the comments into a pandas dataframe
    df = pd.DataFrame(data["data"])

    # Return the DataFrame
    return df

In [107]:
def df_to_csv(data, filename):
    """
    Basically just a wrapper around the Pandas `.to_csv()` method,
    created to standardize the inputs and outputs.
    
    ---- Arguments ----
    data: (pd.DataFrame) Pandas DataFrame to be saved as a csv.
    filepath: (str) name or path of the file to be saved.
    -------------------
    """
    
    # Saves the DataFrame to csv
    data.to_csv(path_or_buf=filename, index=False)
    
    # And that's it, folks!

In [108]:
def reddit_data_setter(keywords, subreddits, csv=False, frequency="month", aggs="created_utc"):
    """
    Creates two DataFrames that hold combined data of all combinations of keywords / subreddits.
    
    Note: if you're reading this note, that means that this function is still only written
    with the intention of automating a specific set of actions for a specific project.
    
    ---- Arguments ----
    keywords: (list) keyword(s) to search.
    subreddits: (list) name of subreddit(s) to include.
    csv: (bool) if True, save the resulting dataframes as csv file.
    frequency: (str) set the size of the time buckets.
    aggs: (str) aggregate function name. Default is "created_utc".
    (For more information, read the PushShift API Documentation.)
    -------------------
    """
    from time import sleep

    comment_df_list = []  # Empty list to hold comment dataframes
    word_df_list = []  # Empty list to hold monthly word count dataframes
    df_comm = pd.DataFrame()  # Empty dataframe for comment data
    df_main = pd.DataFrame()  # Empty dataframe for keyword counts

    # Create the "month" (datetime) column - to be used when joining
    df_main["month"] = pd.date_range(start="2005-01-01", end="2019-09-01", freq="MS")
    
    # Run query for individual keywords on each subreddit
    # Subreddit (outer) -> keyword (inner) = all keywords in one subreddit at a time
    for subreddit in subreddits:
        for word in keywords:
            # Create unique column name for each subreddit / word combo
            col_name = f"{subreddit}_{word.replace(' ', '')}"
            
            # Indicates current subreddit / keyword
            start = f"{col_name}..."
            print(start)
            sleep(0.5)  # Add sleep time to reduce API load 

            # Make request and convert response to dictionary
            dictionary = pushshift_api_request(word, subreddit)

            # Append aggs word count df to word_df_list
            word_df_list.append(create_df(dictionary, col_name))

            # Append comments df to comment_df_list
            comment_df_list.append(comments_df(dictionary))
            
            sleep(0.5)  # More sleep to reduce API load
            sleep(0.5)
    
    # Set "month" as index in order to concatenate list of dataframes
    df_main = pd.concat([df.set_index("month") for df in word_df_list],
                        axis=1, join="outer").reset_index()
    
    # Concatenate comment_df_list dataframes
    df_comm = pd.concat(comment_df_list, axis=0, sort=False,
                        join="outer", ignore_index=True)
        
    # If csv parameter is set to True, save datasets to filesystem as csv
    if csv:
        df_to_csv(df_main, f"{keywords[0]}-monthly.csv")
        df_to_csv(df_comm, f"{keywords[0]}-comments.csv")
    
    # Return df_main, df_comm, respectively
    return df_main, df_comm

---
---

## Term Velocity: Algorithm

The velocity of the term "algorithm" in each of the target subreddits.

In [42]:
# Define keywords and subreddits as python lists
words = [
    "algorithm",
]

subs = [
    "Futurology",
    "technology",
    "science",
    "askscience",
    "gadgets",
    "books",
    "scifi",
    "movies",
    "gaming",
    "television",
    "news",
    "worldnews",
    "politics",
    "philosophy",
    "AskReddit",
    "todayilearned",
    "explainlikeimfive",
]

In [None]:
# Run the function to create and save the dataset
df_main, df_comm = reddit_data_setter(words, subs, True)

In [10]:
# Take a look to be sure it worked as expected
print(df_main.shape)
df_main.head()

(156, 18)


Unnamed: 0,month,Futurology_algorithm,technology_algorithm,science_algorithm,askscience_algorithm,gadgets_algorithm,books_algorithm,scifi_algorithm,movies_algorithm,gaming_algorithm,television_algorithm,news_algorithm,worldnews_algorithm,politics_algorithm,philosophy_algorithm,AskReddit_algorithm,todayilearned_algorithm,explainlikeimfive_algorithm
0,2006-10-01,,,1,,,,,,,,,,,,,,
1,2006-11-01,,,1,,,,,,,,,,,,,,
2,2006-12-01,,,0,,,,,,,,,,,,,,
3,2007-01-01,,,2,,,,,,,,,,,,,,
4,2007-02-01,,,2,,,,,,,,,,,,,,


---

### Visualizations

In [67]:
# Load csv
df_main = pd.read_csv("008-Session_Exports/algorithm-monthly.csv")

In [76]:
df_main["month"] = pd.to_datetime(df_main["month"], infer_datetime_format=True)
df_main.head()

Unnamed: 0,month,Futurology_algorithm,technology_algorithm,science_algorithm,askscience_algorithm,gadgets_algorithm,books_algorithm,scifi_algorithm,movies_algorithm,gaming_algorithm,television_algorithm,news_algorithm,worldnews_algorithm,politics_algorithm,philosophy_algorithm,AskReddit_algorithm,todayilearned_algorithm,explainlikeimfive_algorithm
0,2006-10-01,,,1,,,,,,,,,,,,,,
1,2006-11-01,,,1,,,,,,,,,,,,,,
2,2006-12-01,,,0,,,,,,,,,,,,,,
3,2007-01-01,,,2,,,,,,,,,,,,,,
4,2007-02-01,,,2,,,,,,,,,,,,,,


In [78]:
df_main.dtypes

month                          datetime64[ns]
Futurology_algorithm                  float64
technology_algorithm                  float64
science_algorithm                       int64
askscience_algorithm                  float64
gadgets_algorithm                     float64
books_algorithm                       float64
scifi_algorithm                       float64
movies_algorithm                      float64
gaming_algorithm                      float64
television_algorithm                  float64
news_algorithm                        float64
worldnews_algorithm                   float64
politics_algorithm                    float64
philosophy_algorithm                  float64
AskReddit_algorithm                   float64
todayilearned_algorithm               float64
explainlikeimfive_algorithm           float64
dtype: object

In [79]:
# Color assignments
subs_colors = {}

for i in range(len(subs)):
    subs_colors[f"{subs[i]}"] = f"{palette[i]}"

In [None]:
# Output to current notebook
output_notebook()
output_file(f"{words[0]}-velocity-viz.html")

p = {}  # dict to hold plots
p_names = []  # list for plot names

for sub in subs_colors:
    p[f"{sub}"] = figure(title=f"Comments that mention '{words[0]}' in r/{sub}",
                         plot_width=1000, plot_height=200, 
                         x_axis_type="datetime", x_range=(df_main.iloc[14][0], df_main.iloc[-1][0]))
    p[f"{sub}"].line(df_main["month"], df_main[f"{sub}_{words[0]}"], line_width=2, line_color=f"{subs_colors[sub]}")
    p_names.append(p[f"{sub}"])

# Show the results
show(column(p_names))

---
---

## Term Velocity: AI

The velocity of the term "AI" (abbreviation of artificial intelligence) in each of the target subreddits.

In [32]:
# Define keywords and subreddits as python lists
words = [
    "AI",
]

subs = [
    "Futurology",
    "technology",
    "science",
    "askscience",
    "gadgets",
    "books",
    "scifi",
    "movies",
    "gaming",
    "television",
    "news",
    "worldnews",
    "politics",
    "philosophy",
    "AskReddit",
    "todayilearned",
    "explainlikeimfive",
]

In [None]:
# Run the function to create and save the dataset
df_main, df_comm = reddit_data_setter(words, subs, True)

In [34]:
# Take a look to be sure it worked as expected
print(df_main.shape)
df_main.head()

(156, 18)


Unnamed: 0,month,Futurology_AI,technology_AI,science_AI,askscience_AI,gadgets_AI,books_AI,scifi_AI,movies_AI,gaming_AI,television_AI,news_AI,worldnews_AI,politics_AI,philosophy_AI,AskReddit_AI,todayilearned_AI,explainlikeimfive_AI
0,2006-10-01,,,1,,,,,,,,,,,,,,
1,2006-11-01,,,3,,,,,,,,,,,,,,
2,2006-12-01,,,0,,,,,,,,,,,,,,
3,2007-01-01,,,0,,,,,,,,,,,,,,
4,2007-02-01,,,5,,,,,,,,,,,,,,


---

### Visualizations

In [36]:
# Color assignments
subs_colors = {}

for i in range(len(subs)):
    subs_colors[f"{subs[i]}"] = f"{palette[i]}"

In [None]:
# Output to current notebook
output_notebook()
output_file(f"{words[0]}-velocity-viz.html")

p = {}  # dict to hold plots
p_names = []  # list for plot names

for sub in subs_colors:
    p[f"{sub}"] = figure(title=f"Comments that mention '{words[0]}' in r/{sub}",
                         plot_width=1000, plot_height=200, 
                         x_axis_type="datetime", x_range=(df_main.iloc[14][0], df_main.iloc[-1][0]))
    p[f"{sub}"].line(df_main["month"], df_main[f"{sub}_{words[0]}"], line_width=2, line_color=f"{subs_colors[sub]}")
    p_names.append(p[f"{sub}"])

# Show the results
show(column(p_names))

---
---

## Term Velocity: AR

The velocity of the term "AR" (abbreviation of augmented reality) in each of the target subreddits.

In [84]:
# Define keywords and subreddits as python lists
words = [
    "AR",
]

subs = [
    "Futurology",
    "technology",
    "science",
    "askscience",
    "gadgets",
    "books",
    "scifi",
    "movies",
    "gaming",
    "television",
    "news",
    "worldnews",
    "politics",
    "philosophy",
    "AskReddit",
    "todayilearned",
    "explainlikeimfive",
]

In [None]:
# Run the function to create and save the dataset
df_main, df_comm = reddit_data_setter(words, subs, True)

In [86]:
# Take a look to be sure it worked as expected
print(df_main.shape)
df_main.head()

(156, 18)


Unnamed: 0,month,Futurology_AR,technology_AR,science_AR,askscience_AR,gadgets_AR,books_AR,scifi_AR,movies_AR,gaming_AR,television_AR,news_AR,worldnews_AR,politics_AR,philosophy_AR,AskReddit_AR,todayilearned_AR,explainlikeimfive_AR
0,2006-10-01,,,1,,,,,,,,,,,,,,
1,2006-11-01,,,0,,,,,,,,,,,,,,
2,2006-12-01,,,0,,,,,,,,,,,,,,
3,2007-01-01,,,0,,,,,,,,,,,,,,
4,2007-02-01,,,0,,,,,,,,,,,,,,


---

### Visualizations

In [87]:
# Color assignments
subs_colors = {}

for i in range(len(subs)):
    subs_colors[f"{subs[i]}"] = f"{palette[i]}"

In [None]:
# Output to current notebook
output_notebook()
output_file(f"{words[0]}-velocity-viz.html")

p = {}  # dict to hold plots
p_names = []  # list for plot names

for sub in subs_colors:
    p[f"{sub}"] = figure(title=f"Comments that mention '{words[0]}' in r/{sub}",
                         plot_width=1000, plot_height=200, 
                         x_axis_type="datetime", x_range=(df_main.iloc[14][0], df_main.iloc[-1][0]))
    p[f"{sub}"].line(df_main["month"], df_main[f"{sub}_{words[0]}"], line_width=2, line_color=f"{subs_colors[sub]}")
    p_names.append(p[f"{sub}"])

# Show the results
show(column(p_names))

---
---

## Term Velocity: Automation

The velocity of the term "automation" in each of the target subreddits.

In [89]:
# Define keywords and subreddits as python lists
words = [
    "automation",
]

subs = [
    "Futurology",
    "technology",
    "science",
    "askscience",
    "gadgets",
    "books",
    "scifi",
    "movies",
    "gaming",
    "television",
    "news",
    "worldnews",
    "politics",
    "philosophy",
    "AskReddit",
    "todayilearned",
    "explainlikeimfive",
]

In [None]:
# Run the function to create and save the dataset
df_main, df_comm = reddit_data_setter(words, subs, True)

In [91]:
# Take a look to be sure it worked as expected
print(df_main.shape)
df_main.head()

(151, 18)


Unnamed: 0,month,Futurology_automation,technology_automation,science_automation,askscience_automation,gadgets_automation,books_automation,scifi_automation,movies_automation,gaming_automation,television_automation,news_automation,worldnews_automation,politics_automation,philosophy_automation,AskReddit_automation,todayilearned_automation,explainlikeimfive_automation
0,2007-03-01,,,1,,,,,,,,,,,,,,
1,2007-04-01,,,0,,,,,,,,,,,,,,
2,2007-05-01,,,0,,,,,,,,,,,,,,
3,2007-06-01,,,0,,,,,,,,,,,,,,
4,2007-07-01,,,2,,,,,,,,,,,,,,


---

### Visualizations

In [None]:
# Output to current notebook
output_notebook()
output_file(f"{words[0]}-velocity-viz.html")

p = {}  # dict to hold plots
p_names = []  # list for plot names

for sub in subs_colors:
    p[f"{sub}"] = figure(title=f"Comments that mention '{words[0]}' in r/{sub}",
                         plot_width=1000, plot_height=200, 
                         x_axis_type="datetime", x_range=(df_main.iloc[14][0], df_main.iloc[-1][0]))
    p[f"{sub}"].line(df_main["month"], df_main[f"{sub}_{words[0]}"], line_width=2, line_color=f"{subs_colors[sub]}")
    p_names.append(p[f"{sub}"])

# Show the results
show(column(p_names))

---
---

## Term Velocity: Big Data

The velocity of the term "big data" in each of the target subreddits.

In [97]:
# Define keywords and subreddits as python lists
words = [
    "big data",
]

subs = [
    "Futurology",
    "technology",
    "science",
    "askscience",
    "gadgets",
    "books",
    "scifi",
    "movies",
    "gaming",
    "television",
    "news",
    "worldnews",
    "politics",
    "philosophy",
    "AskReddit",
    "todayilearned",
    "explainlikeimfive",
]

In [98]:
# Run the function to create and save the dataset
df_main, df_comm = reddit_data_setter(words, subs, True)

Futurology_bigdata...
technology_bigdata...
science_bigdata...
askscience_bigdata...
gadgets_bigdata...
books_bigdata...
scifi_bigdata...
movies_bigdata...
gaming_bigdata...
television_bigdata...
news_bigdata...
worldnews_bigdata...
politics_bigdata...
philosophy_bigdata...
AskReddit_bigdata...
todayilearned_bigdata...
explainlikeimfive_bigdata...


In [99]:
# Take a look to be sure it worked as expected
print(df_main.shape)
df_main.head()

(153, 18)


Unnamed: 0,month,Futurology_bigdata,technology_bigdata,science_bigdata,askscience_bigdata,gadgets_bigdata,books_bigdata,scifi_bigdata,movies_bigdata,gaming_bigdata,television_bigdata,news_bigdata,worldnews_bigdata,politics_bigdata,philosophy_bigdata,AskReddit_bigdata,todayilearned_bigdata,explainlikeimfive_bigdata
0,2007-01-01,,,2,,,,,,,,,,,,,,
1,2007-02-01,,,1,,,,,,,,,,,,,,
2,2007-03-01,,,1,,,,,,,,,,,,,,
3,2007-04-01,,,2,,,,,,,,,,,,,,
4,2007-05-01,,,1,,,,,,,,,,,,,,


---

### Visualizations

In [None]:
# Output to current notebook
output_notebook()
output_file(f"{words[0].replace(' ', '')}-velocity-viz.html")

p = {}  # dict to hold plots
p_names = []  # list for plot names

for sub in subs_colors:
    p[f"{sub}"] = figure(title=f"Comments that mention '{words[0]}' in r/{sub}",
                         plot_width=1000, plot_height=200, 
                         x_axis_type="datetime", x_range=(df_main.iloc[14][0], df_main.iloc[-1][0]))
    p[f"{sub}"].line(df_main["month"], df_main[f"{sub}_{words[0].replace(' ', '')}"], line_width=2, line_color=f"{subs_colors[sub]}")
    p_names.append(p[f"{sub}"])

# Show the results
show(column(p_names))

---
---

## Overall Subreddit Comment Velocity

The total number of comments made in each of the subreddits. This is one way I can normalize the data.

In [112]:
# Define keywords and subreddits as python lists
words = [""]  # Passing in an empty list this time to look at all comments

subs = [
    "Futurology",
    "technology",
    "science",
    "askscience",
    "gadgets",
    "books",
    "scifi",
    "movies",
    "gaming",
    "television",
    "news",
    "worldnews",
    "politics",
    "philosophy",
    "AskReddit",
    "todayilearned",
    "explainlikeimfive",
]

---

In [111]:
def all_comments_monthly(subreddit, frequency="month", aggs="created_utc"):
    """
    Returns the JSON response of a PushShift API aggregate comment search as a Python dictionary.
    
    Note: if you're reading this note, that means that this function is still only written
    with the intention of automating a specific set of actions for a specific project.
    
    ---- Arguments ----
    query: (str) keyword to search.
    subreddit: (str) subreddit name
    frequency: (str) set the size of the time buckets.
    aggs: (str) aggregate function name. Default is "created_utc".
    (For more information, read the PushShift API Documentation.)
    -------------------
    """
    
    # Build the query url based on endpoints and parameters 
    url = f"https://api.pushshift.io/reddit/search/comment/?subreddit={subreddit}&aggs={aggs}&frequency={frequency}&size=100"
    
    # Send the request and save the response into the response object
    response = requests.get(url)
    
    # Check the response; stop execution if failed
    assert response.status_code == 200
    
    # Parse the JSON into a Python dictionary and return it for further processing
    return response.json()

In [108]:
def all_comments_aggregator(keywords, subreddits, csv=False, frequency="month", aggs="created_utc"):
    """
    Creates two DataFrames that hold combined data of all comments in all the target subreddits.
    
    Note: if you're reading this note, that means that this function is still only written
    with the intention of automating a specific set of actions for a specific project.
    
    ---- Arguments ----
    keywords: (list) keyword(s) to search.
    subreddits: (list) name of subreddit(s) to include.
    csv: (bool) if True, save the resulting dataframes as csv file.
    frequency: (str) set the size of the time buckets.
    aggs: (str) aggregate function name. Default is "created_utc".
    (For more information, read the PushShift API Documentation.)
    -------------------
    """
    from time import sleep

    comment_df_list = []  # Empty list to hold comment dataframes
    word_df_list = []  # Empty list to hold monthly word count dataframes
    df_comm = pd.DataFrame()  # Empty dataframe for comment data
    df_main = pd.DataFrame()  # Empty dataframe for keyword counts

    # Create the "month" (datetime) column - to be used when joining
    df_main["month"] = pd.date_range(start="2005-01-01", end="2019-09-01", freq="MS")
    
    # Run query for individual keywords on each subreddit
    # Subreddit (outer) -> keyword (inner) = all keywords in one subreddit at a time
    for subreddit in subreddits:
        for word in keywords:
            # Create unique column name for each subreddit / word combo
            col_name = f"{subreddit}_{word.replace(' ', '')}"
            
            # Indicates current subreddit / keyword
            start = f"{col_name}..."
            print(start)
            sleep(0.5)  # Add sleep time to reduce API load 

            # Make request and convert response to dictionary
            dictionary = pushshift_api_request(word, subreddit)

            # Append aggs word count df to word_df_list
            word_df_list.append(create_df(dictionary, col_name))

            # Append comments df to comment_df_list
            comment_df_list.append(comments_df(dictionary))
            
            sleep(0.5)  # More sleep to reduce API load
            sleep(0.5)
    
    # Set "month" as index in order to concatenate list of dataframes
    df_main = pd.concat([df.set_index("month") for df in word_df_list],
                        axis=1, join="outer").reset_index()
    
    # Concatenate comment_df_list dataframes
    df_comm = pd.concat(comment_df_list, axis=0, sort=False,
                        join="outer", ignore_index=True)
        
    # If csv parameter is set to True, save datasets to filesystem as csv
    if csv:
        df_to_csv(df_main, f"{keywords[0]}-monthly.csv")
        df_to_csv(df_comm, f"{keywords[0]}-comments.csv")
    
    # Return df_main, df_comm, respectively
    return df_main, df_comm

---

In [113]:
# Run the function to create and save the dataset
df_main, df_comm = reddit_data_setter(words, subs, True)

Futurology_...
technology_...
science_...
askscience_...
gadgets_...
books_...
scifi_...
movies_...
gaming_...
television_...
news_...
worldnews_...
politics_...
philosophy_...
AskReddit_...
todayilearned_...
explainlikeimfive_...


In [114]:
# Take a look to be sure it worked as expected
print(df_main.shape)
df_main.head()

(156, 18)


Unnamed: 0,month,Futurology_,technology_,science_,askscience_,gadgets_,books_,scifi_,movies_,gaming_,television_,news_,worldnews_,politics_,philosophy_,AskReddit_,todayilearned_,explainlikeimfive_
0,2006-10-01,,,562,,,,,,,,,,,,,,
1,2006-11-01,,,1798,,,,,,,,,,,,,,
2,2006-12-01,,,1848,,,,,,,,,,,,,,
3,2007-01-01,,,2018,,,,,,,,,,,,,,
4,2007-02-01,,,2605,,,,,,,,,,,,,,


---

### Visualizations

In [115]:
# Output to current notebook
output_notebook()
output_file("overall-subreddit-velocity-viz.html")

p = {}  # dict to hold plots
p_names = []  # list for plot names

for sub in subs_colors:
    p[f"{sub}"] = figure(title=f"Comments in r/{sub}",
                         plot_width=1000, plot_height=200, 
                         x_axis_type="datetime", x_range=(df_main.iloc[14][0], df_main.iloc[-1][0]))
    p[f"{sub}"].line(df_main["month"], df_main[f"{sub}_"], line_width=2, line_color=f"{subs_colors[sub]}")
    p_names.append(p[f"{sub}"])

# Show the results
show(column(p_names))