# Data Scraper

This notebook will be responsible for scraping all submissions, posts, and user data from Reddit. It is predominantly demonstrative and ultimately will need to be translated into executable Python code that will permit outside parties to run scripts based on the underlying functions here. 

### Starters

First, let's begin importing our relevant libraries and establishing our root project directory.

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import praw
import json
import re

In [3]:
from datetime import datetime as dt, timedelta as td
import requests

In [12]:
DATA_FOLDER = '/Users/ssomani/research/heartlab/statins_reddit/data/'
API_LOGIN_FN = '/Users/ssomani/research/heartlab/statins_reddit/data/reddit_auth.json'

In [13]:
date_time = {
    'start' : {
        'year' : 2010,
        'month' : 1,
        'day' : 1
    },
    
    'end' : {
        'year' : 2022,
        'month' : 5,
        'day' : 1
    }
}

start_time = int(dt(date_time['start']['year'], date_time['start']['month'], date_time['start']['day']).timestamp())
end_time = int(dt(date_time['end']['year'], date_time['end']['month'], date_time['end']['day']).timestamp())

In [14]:
with open(API_LOGIN_FN, 'r') as f:
    login_info = json.load(f)
    
# Check to ensure that login_info contains the necessary information.
assert('client_id' in login_info.keys())
assert('client_secret' in login_info.keys())
assert('user_agent' in login_info.keys())

### PRAW

In [77]:
reddit = praw.Reddit(**login_info)

In [78]:
tc = reddit.comment('cholesterol')

#### Scrape!

In [151]:
def scrape_posts(subreddit_name, search_word):
    """ Search for all posts matching a search word in a subreddit.
    """
    
    # Create the Subreddit object that matches the subreddit name.
    subreddit = reddit.subreddit(subreddit_name)
    
    # Create empty objects to store post information.
    post_info = pd.DataFrame()
    post_info.index.name = 'id'
    
    posts = {}
    
    # Generate the iterator that will scrape over 
    for post in subreddit.search(search_word, limit=None):
        
        # Save this post as k:v pair.
        posts[post.id] = post
        
        # Store this post's metadata in our dataframe.
        post_info.loc[post.id, 'query'] = search_word
        post_info.loc[post.id, 'subreddit'] = subreddit_name
        post_info.loc[post.id, 'author'] = post.author
        post_info.loc[post.id, 'title'] = post.title
        post_info.loc[post.id, 'body'] = post.selftext
        post_info.loc[post.id, 'number_of_comments'] = post.num_comments
        post_info.loc[post.id, 'upvotes'] = post.score
        post_info.loc[post.id, 'upvote_ratio'] = post.upvote_ratio
        
    return posts, post_info

In [9]:
def scrape_reddit(subreddits, search_words):
    """Scrapes Reddit for posts matching a set of search words in a set of subreddits.
    """

    POST_INFO = ['query', 'subreddit', 'author', 'title', 'body', 'number_of_comments', 'upvotes', 'upvote_ratio']

    all_posts = {}
    all_posts_df = pd.DataFrame(columns=POST_INFO)
    all_posts_df.index.name = 'id'

    for subreddit in subreddits:
        for search_word in search_words:
            print("Now searching r/{0} for {1}".format(subreddit, search_word))

            post_ij, post_df_ij = scrape_posts(subreddit, search_word)

            print("Completed search. Joining!")

            all_posts.update(post_ij)
            all_posts_df = pd.concat([all_posts_df, post_df_ij])

            with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:
                all_posts_df.to_excel(writer)

In [175]:
subreddits = ['ketoscience', 'science', 'keto', 'Health', 'conspiracy', 'todayilearned', 'ScientificNutrition', 'askscience', 'Futurology',
    'news', 'PlantBasedDiet', 'nutrition', 'Paleo', 'fasting', 'longevity', 'Supplements', 'Coronavirus', 'COVID19', 'Cholesterol', 'Fitness']
search_words = ['atorvastatin', 'lipitor', 'rosuvastatin', 'crestor', 'pitavastatin', 'livalo', 'zypitamag', 'simvastatin', 'zocor', 'pravastatin', 
                'pravachol', 'lovastatin', 'altoprev', 'fluvastatin', 'lescol']

scrape_reddit(subreddits, search_words)

Now searching r/ketoscience for atorvastatin
Completed search. Joining!
Now searching r/ketoscience for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ketoscience for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ketoscience for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ketoscience for pitavastatin
Completed search. Joining!


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:
  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Now searching r/ketoscience for livalo
Completed search. Joining!
Now searching r/ketoscience for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ketoscience for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ketoscience for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ketoscience for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ketoscience for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ketoscience for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ketoscience for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ketoscience for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ketoscience for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for zypitamag
Completed search. Joining!


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:
  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Now searching r/science for simvastatin
Completed search. Joining!
Now searching r/science for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/science for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/keto for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Health for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/conspiracy for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/todayilearned for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/ScientificNutrition for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/askscience for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Futurology for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/news for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/PlantBasedDiet for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/nutrition for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Paleo for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/fasting for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/longevity for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Supplements for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Coronavirus for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/COVID19 for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Cholesterol for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for atorvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for lipitor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for rosuvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for crestor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for pitavastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for livalo


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for zypitamag


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for simvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for zocor


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for pravastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for pravachol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for lovastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for altoprev


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for fluvastatin


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!
Now searching r/Fitness for lescol


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


Completed search. Joining!


  with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:


## PushShift.io?

Let's try to use PushShift.io as a better way of tabulating this data instead.

In [4]:
from psaw import PushshiftAPI
api = PushshiftAPI()

In [5]:
POST_DF_MAPPING = {
    'id' : 'id',
    'author' : 'author',
    'title' : 'title',
    'date' : 'created_utc',
    'body' : 'selftext',
    'n_comments' : 'num_comments',
    'upvotes' : 'score',
}

COMM_DF_MAPPING = {
    'id' : 'id',
    'author' : 'author',
    'title' : 'title',    
    'body' : 'body',
    'date' : 'created_utc',
    'link_id' : 'link_id',
    'parent_id' : 'parent_id',
    'upvotes' : 'score'
}

PSAW_MAPPINGS = {
    'post' : POST_DF_MAPPING,
    'comment' : COMM_DF_MAPPING
}

In [6]:
def api_result_conversion(res, type_of_res):
    """
    Convert the API result from PSAW into a pandas Series.    
    """
    
    mapping = PSAW_MAPPINGS[type_of_res]
    
    out = []
    
    for col, col_map in mapping.items():
        try:
            out.append(getattr(res, col_map))
        except:
            out.append('')
        
    return out

In [10]:
def scrape_reddit_posts(subreddits, search_words, save_name):
    """Scrapes Reddit for posts matching a set of search words in a set of subreddits.
    """

    posts = []
    cols = ['query', 'subreddit']
    cols.extend(POST_DF_MAPPING.keys())

    for subreddit in subreddits:
        for search_word in search_words:
            print("Now searching r/{0} for {1}".format(subreddit, search_word))
            
            # Generate the iterator that will scrape over 
            for post in api.search_submissions(q=search_word, subreddit=subreddit):
                
                post_ext = [subreddit, search_word]
                post_ext.extend(api_result_conversion(post, 'post'))
                
                posts.append(post_ext)

            print("Completed search. Total {0} entries!".format(len(posts)))
                             
    post_df = pd.DataFrame(posts, columns=cols)
    post_df = post_df.drop_duplicates(subset='id')
    post_df.set_index('id')
    
    with pd.ExcelWriter(DATA_FOLDER + save_name, engine='xlsxwriter', options={'strings_to_urls': False}) as writer:
        post_df.to_excel(writer)
    
    return post_df

def scrape_reddit_comments(subreddits, search_words, save_name):
    """Scrapes Reddit for posts matching a set of search words in a set of subreddits.
    """

    comms = []
    cols = ['query', 'subreddit']
    cols.extend(COMM_DF_MAPPING.keys())

    for subreddit in subreddits:
        for search_word in search_words:
            print("Now searching r/{0} for {1}".format(subreddit, search_word))
            
            # Generate the iterator that will scrape over 
            for comm in api.search_comments(q=search_word, subreddit=subreddit):
                
                comm_ext = [subreddit, search_word]
                comm_ext.extend(api_result_conversion(comm, 'comment'))
                
                comms.append(comm_ext)

            print("Completed search. Total {0} entries!".format(len(comms)))
                             
    comm_df = pd.DataFrame(comms, columns=cols)
    comm_df = comm_df.drop_duplicates(subset='id')
    comm_df.set_index('id')
    
    with pd.ExcelWriter(DATA_FOLDER + save_name, engine='xlsxwriter', options={'strings_to_urls': False}) as writer:
        comm_df.to_excel(writer)
        
    return comm_df

In [8]:
def scrape_reddit_comments_in_post(posts, save_name):
    """Scrapes Reddit for posts matddching a set of search words in a set of subreddits.
    """
    
    cols = []
    cols.extend(COMM_DF_MAPPING.keys())
    
    comments = []
    post_ids = posts.index.drop_duplicates()
    
    for post_id in post_ids:
        print("Now searching post <{0}>".format(post_id))

        for comment in api.search_comments(link_id=post_id):
                
            comment_row = api_result_conversion(comment, 'comment')
            comments.append(comment_row)
            
        print("Completed search. Total {0} entries!".format(len(comments)))

    comm_df = pd.DataFrame(comments, columns=cols)
    comm_df.set_index('id')
    
    with pd.ExcelWriter(DATA_FOLDER + save_name, engine='xlsxwriter', options={'strings_to_urls': False}) as writer:
        comm_df.to_excel(writer)


In [44]:
scrape_reddit_comments_in_post(posts, 'comments_from_posts_20220706.xlsx')

Now searching post <tqf3q1>
Completed search. Total 0 entries!
Now searching post <tqctzk>
Completed search. Total 0 entries!
Now searching post <t5j2iq>
Completed search. Total 0 entries!
Now searching post <sdiu8l>
Completed search. Total 0 entries!
Now searching post <rorrcc>
Completed search. Total 0 entries!
Now searching post <rnqlnj>
Completed search. Total 0 entries!
Now searching post <rc7ati>
Completed search. Total 0 entries!
Now searching post <q20bog>
Completed search. Total 47 entries!
Now searching post <p1w57r>
Completed search. Total 49 entries!
Now searching post <p1qo5n>
Completed search. Total 49 entries!
Now searching post <o2rimg>
Completed search. Total 52 entries!
Now searching post <nuyvd3>
Completed search. Total 56 entries!
Now searching post <njgthz>
Completed search. Total 64 entries!
Now searching post <m0jdr3>
Completed search. Total 90 entries!
Now searching post <lwzj79>
Completed search. Total 97 entries!
Now searching post <kv4q31>
Completed search. T

KeyboardInterrupt: 

In [16]:
subreddits = ['ketoscience', 'science', 'keto', 'Health', 'conspiracy', 'todayilearned', 'ScientificNutrition', 'askscience',
    'news', 'PlantBasedDiet', 'nutrition', 'Paleo', 'longevity', 'Supplements', 'COVID19', 'Cholesterol', 'skeptic', 'diabetes', 'stopusingstatins']
search_words = ['statin', 'atorvastatin', 'lipitor', 'rosuvastatin', 'crestor', 'pitavastatin', 'livalo', 'zypitamag', 'simvastatin', 'zocor', 'pravastatin', 
                'pravachol', 'lovastatin', 'altoprev', 'fluvastatin', 'lescol']

comms = scrape_reddit_comments(subreddits, search_words, 'comments_20220712.xlsx')
posts = scrape_reddit_posts(subreddits, search_words, 'posts_20220712.xlsx')

Now searching r/ketoscience for statin
Completed search. Total 76 entries!
Now searching r/ketoscience for atorvastatin
Completed search. Total 79 entries!
Now searching r/ketoscience for lipitor
Completed search. Total 84 entries!
Now searching r/ketoscience for rosuvastatin
Completed search. Total 86 entries!
Now searching r/ketoscience for crestor
Completed search. Total 91 entries!
Now searching r/ketoscience for pitavastatin
Completed search. Total 91 entries!
Now searching r/ketoscience for livalo
Completed search. Total 91 entries!
Now searching r/ketoscience for zypitamag
Completed search. Total 91 entries!
Now searching r/ketoscience for simvastatin
Completed search. Total 100 entries!
Now searching r/ketoscience for zocor
Completed search. Total 101 entries!
Now searching r/ketoscience for pravastatin
Completed search. Total 101 entries!
Now searching r/ketoscience for pravachol
Completed search. Total 102 entries!
Now searching r/ketoscience for lovastatin
Completed search. 

In [17]:
comms = pd.read_excel(DATA_FOLDER + 'comments_20220626.xlsx', index_col='link_id')
posts = pd.read_excel(DATA_FOLDER + 'posts_20220626.xlsx', index_col='id')

posts['date'] = posts['date'].apply(lambda x: dt.fromtimestamp(x))
comms['date'] = comms['date'].apply(lambda x: dt.fromtimestamp(x))

In [31]:
search_words_regex = re.compile("|".join(search_words))

matching_rows = posts['body'].map(lambda x: True if search_words_regex.search(x) else False) + \
    posts['title'].map(lambda x: True if search_words_regex.search(x) else False)

posts = posts.loc[matching_rows, :].sort_values(by='upvotes')

In [32]:
with pd.ExcelWriter(DATA_FOLDER + 'posts_20220712.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:
    posts.to_excel(writer)

In [47]:
posts['content'] = posts['title'] + '. ' + posts['body']
comms['content'] = comms['body']

posts['type'] = 'post'
comms['type'] = 'comment'

posts.set_index('id', inplace=True)
comms.set_index('id', inplace=True)

Unnamed: 0,query,subreddit,id,author,title,date,body,n_comments,upvotes,content
122,science,statin,690frt,[deleted],A new study helps debunk the widespread belief...,1493819266,[deleted],0,0,A new study helps debunk the widespread belief...
1688,skeptic,statin,3yhkcr,FutureFormerRedditor,Are statin drugs the cause of widespread decli...,1451280370,,2,0,Are statin drugs the cause of widespread decli...
357,keto,statin,6kynru,Cetaphil911,Blood test help...,1499069941,I do not have the specific numbers right now b...,13,0,Blood test help.... I do not have the specific...
734,Health,statin,7t4yk,quakerorts,High-dose statin therapy promotes tumor growth,1233170813,,0,0,High-dose statin therapy promotes tumor growth.
1100,Paleo,statin,qnmcy,zak_on_reddit,On 3/8/12 Limpdick Limbaugh is praising Paleo ...,1331231247,On THU 3/8 I'm listening to the Hillbilly Hero...,4,0,On 3/8/12 Limpdick Limbaugh is praising Paleo ...
...,...,...,...,...,...,...,...,...,...,...
321,keto,statin,7wnkhk,Lyttle86,Keto is changing my practice.,1518290784,I’m a physician in a rural town in Canada. I ...,199,685,Keto is changing my practice.. I’m a physician...
304,keto,statin,93t5y0,jordanonfilm,"Nine months into keto, I've given birth to a n...",1533158553,TL; DR: FAT MAKES YOU FULL. 350 to 199.\n\nIn ...,111,829,"Nine months into keto, I've given birth to a n..."
112,science,statin,c57kd5,cassidy498,"Cholesterol medication could invite diabetes, ...",1561464326,,131,910,"Cholesterol medication could invite diabetes, ..."
298,keto,statin,9fex2w,Sayonara_Sugar,8 months of reducing sugar and simple carbs an...,1536812406,Yes.\n\nYes it was.\n\nI weighed in on January...,151,1615,8 months of reducing sugar and simple carbs an...


In [50]:
comms

Unnamed: 0,query,subreddit,id,author,title,body,date,link_id,parent_id,upvotes,content
0,ketoscience,statin,icnalcx,dirtyloophole,,I completely agree that there is a lot of BS Y...,1655424021,t3_rnqlnj,t1_i9t0ktq,1,I completely agree that there is a lot of BS Y...
1,ketoscience,statin,i9su4gy,Etadenod,,bravo. this is the best answer why you should ...,1653396317,t3_rnqlnj,t1_hpxgazc,1,bravo. this is the best answer why you should ...
2,ketoscience,statin,i9stx7r,Etadenod,,Evidence supports the idea that statin therapy...,1653396204,t3_rnqlnj,t1_hptsfui,1,Evidence supports the idea that statin therapy...
3,ketoscience,statin,i8gq5ls,Triabolical_,,There is unfortunately not great research on t...,1652456719,t3_uo0wco,t1_i8fwjc8,1,There is unfortunately not great research on t...
4,ketoscience,statin,i60eyz4,sdavis484,,If you take a statin? Weird correlation.,1650814542,t3_uawfdn,t1_i60c4fz,1,If you take a statin? Weird correlation.
...,...,...,...,...,...,...,...,...,...,...,...
10556,diabetes,lovastatin,co9d4e9,altintx,,"I swing between A1c of 5.8-6.2, and my cholest...",1422928570,t3_2uka8i,t3_2uka8i,1,"I swing between A1c of 5.8-6.2, and my cholest..."
10557,diabetes,lovastatin,cibgfko,Smokeya,,"Goto any store with a pharmacy, they sell some...",1403163936,t3_28hrqu,t3_28hrqu,2,"Goto any store with a pharmacy, they sell some..."
10558,diabetes,lovastatin,cgizrzz,i4k20z3,,"No, I do take other medications. Including a l...",1396530448,t3_222o4b,t1_cgitbqp,1,"No, I do take other medications. Including a l..."
10559,diabetes,lovastatin,c4kta4n,jordanlund,,Watch the potassium as one of the side effects...,1336333786,t3_t9ayj,t1_c4kpvq9,1,Watch the potassium as one of the side effects...


## PRAW

Load login information for PRAW to begin scraping :) 

## Failed Experiments

In [156]:
class PushShift:
    """
    
    The PushShift iterator combs through data available via pushshift.io by effectively \
    paginating the search query.
    
    """
    
    def __init__(self, subreddit, search_phrase, start_time, end_time):
        """Initialize our PushShift iterator.
        
        ARGS
        ----
        @TODO
        
        """
        
        self.sr = subreddit
        self.q = search_phrase
        self.st = start_time
        self.et = end_time
    
    def __iter__(self):
        self.time = self.et
        return self
    
    def __next__(self):
        count = 0
        
        if self.time > self.st:
            
            p = {
                'q' : self.q,
                'subreddit' : self.sr,
                'before' : self.time,
            }
            
            url = BASE_POSTS_URL + "?q={0}&subreddit={1}&before={2}".format(self.q, self.sr, self.time)
            
            # posts = requests.get(base_url, params=p).json()['data']
            posts = requests.get(base_url)
            posts = posts.json()
            posts = posts['data']
            
            import pdb; pdb.set_trace(); 
            
            self.time = posts[-1]['created_utc']
            posts = self.convert_to_output(posts)
            
        return posts
            
    def convert_to_output(self, posts):
        """ Convert the GET query into a Pandas dataframe that can be concatenated by the \
            parent function calling the iterator.
        """
        
        post_info = pd.DataFrame()
        post_info.index.name = 'id'

        for post in posts:
        
            post_info.loc[post.id, 'query'] = self.q
            post_info.loc[post.id, 'subreddit'] = self.sr
            post_info.loc[post.id, 'author'] = post['author']
            post_info.loc[post.id, 'title'] = post['title']
            post_info.loc[post.id, 'body'] = post['selftext']
            post_info.loc[post.id, 'number_of_comments'] = post['num_comments']
            post_info.loc[post.id, 'upvotes'] = post['score']
            post_info.loc[post.id, 'upvote_ratio'] = post['upvote_ratio']
            
        return post_info

In [121]:
def scrape_posts(subreddit_name, search_word):
    """ Search for all posts matching a search word in a subreddit.
    """
        
    # Create empty objects to store post information.
    POST_COLS = ['query', 'subreddit', 'author', 'title', 'body', 'number_of_comments', 'upvotes', 'upvote_ratio']
    
    post_info = pd.DataFrame(columns=POST_COLS)
    post_info.index.name = 'id'
    
    posts = {}
    
    # Generate the iterator that will scrape over 
    for post in PushShift(subreddit_name, search_word, start_time, end_time):

        post_info = pd.concat([post_info, post])
        
    return post_info

In [46]:
def scrape_reddit(subreddits, search_words):
    """Scrapes Reddit for posts matching a set of search words in a set of subreddits.
    """

    POST_INFO = ['query', 'subreddit', 'author', 'title', 'body', 'number_of_comments', 'upvotes', 'upvote_ratio']

    all_posts_df = pd.DataFrame(columns=POST_INFO)
    all_posts_df.index.name = 'id'

    for subreddit in subreddits:
        for search_word in search_words:
            print("Now searching r/{0} for {1}".format(subreddit, search_word))

            post_df_ij = scrape_posts(subreddit, search_word)

            print("Completed search. Joining!")
            all_posts_df = pd.concat([all_posts_df, post_df_ij])

            with pd.ExcelWriter(DATA_FOLDER + 'scraped_posts_full.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:
                all_posts_df.to_excel(writer)

In [None]:
def scrape_reddit_cmnts(subreddits, search_words):
    """Scrapes Reddit for posts matching a set of search words in a set of subreddits.
    """

    POST_INFO = ['query', 'subreddit', 'author', 'body', 'link_id', 'parent_id', 'upvotes']

    all_posts = {}
    all_posts_df = pd.DataFrame(columns=POST_INFO)
    all_posts_df.index.name = 'id'

    for subreddit in subreddits:
        for search_word in search_words:
            print("Now searching r/{0} for {1}".format(subreddit, search_word))

            # Create empty objects to store post information.
            post_info = pd.DataFrame()
            post_info.index.name = 'id'

            posts = {}

            # Generate the iterator that will scrape over 
            for post in api.search_comments(q=search_word, subreddit=subreddit):

                # Save this post as k:v pair.
                posts[post.id] = post

                # Store this post's metadata in our dataframe.
                post_info.loc[post.id, 'query'] = search_word
                post_info.loc[post.id, 'subreddit'] = subreddit
                post_info.loc[post.id, 'author'] = post.author

                try:
                    post_info.loc[post.id, 'body'] = post.body
                except:
                    pass

                post_info.loc[post.id, 'link_id'] = post.link_id
                post_info.loc[post.id, 'parent_id'] = post.parent_id
                post_info.loc[post.id, 'upvotes'] = post.score
                # post_info.loc[post.id, 'upvote_ratio'] = post.upvote_ratio

            print("Completed search. Found {0} entries. Joining!".format(post_info.shape[0]))

            all_posts.update(posts)
            all_posts_df = pd.concat([all_posts_df, post_info])

            with pd.ExcelWriter(DATA_FOLDER + 'scraped_comments_full_psaw.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:
                all_posts_df.to_excel(writer)

In [313]:
subreddits = ['ketoscience', 'science', 'keto', 'Health', 'conspiracy', 'todayilearned', 'ScientificNutrition', 'askscience', 'Futurology',
    'news', 'PlantBasedDiet', 'nutrition', 'Paleo', 'fasting', 'longevity', 'Supplements', 'Coronavirus', 'COVID19', 'Cholesterol', 'Fitness']
search_words = ['statin', 'atorvastatin', 'lipitor', 'rosuvastatin', 'crestor', 'pitavastatin', 'livalo', 'zypitamag', 'simvastatin', 'zocor', 'pravastatin', 
                'pravachol', 'lovastatin', 'altoprev', 'fluvastatin', 'lescol']

scrape_reddit_cmnts(subreddits, search_words)

Now searching r/ketoscience for statin
Completed search. Found 464 entries. Joining!
Now searching r/ketoscience for atorvastatin
Completed search. Found 12 entries. Joining!
Now searching r/ketoscience for lipitor
Completed search. Found 23 entries. Joining!
Now searching r/ketoscience for rosuvastatin
Completed search. Found 11 entries. Joining!
Now searching r/ketoscience for crestor
Completed search. Found 15 entries. Joining!
Now searching r/ketoscience for pitavastatin
Completed search. Found 2 entries. Joining!
Now searching r/ketoscience for livalo
Completed search. Found 0 entries. Joining!
Now searching r/ketoscience for zypitamag
Completed search. Found 0 entries. Joining!
Now searching r/ketoscience for simvastatin
Completed search. Found 8 entries. Joining!
Now searching r/ketoscience for zocor
Completed search. Found 0 entries. Joining!
Now searching r/ketoscience for pravastatin
Completed search. Found 4 entries. Joining!
Now searching r/ketoscience for pravachol
Complet

In [303]:
def scrape_author_posts(authors):
    """Scrapes Reddit for posts matching a set of search words in a set of subreddits.
    """

    POST_INFO = ['query', 'subreddit', 'author', 'title', 'body', 'number_of_comments', 'upvotes', 'upvote_ratio']

    all_posts = {}
    all_posts_df = pd.DataFrame(columns=POST_INFO)
    all_posts_df.index.name = 'id'

    for author in authors:
        print("Now searching r/{0} for {1}".format(subreddit, search_word))

        # Create empty objects to store post information.
        post_info = pd.DataFrame()
        post_info.index.name = 'id'

        posts = {}

        # Generate the iterator that will scrape over 
        for post in api.search_comments(q=search_word, subreddit=subreddit):

            # Save this post as k:v pair.
            posts[post.id] = post

            # Store this post's metadata in our dataframe.
            post_info.loc[post.id, 'query'] = search_word
            post_info.loc[post.id, 'subreddit'] = subreddit
            post_info.loc[post.id, 'author'] = post.author

            try:
                post_info.loc[post.id, 'body'] = post.body
            except:
                pass

            post_info.loc[post.id, 'link_id'] = post.link_id
            post_info.loc[post.id, 'parent_id'] = post.parent_id
            post_info.loc[post.id, 'upvotes'] = post.score
            # post_info.loc[post.id, 'upvote_ratio'] = post.upvote_ratio

            print("Completed search. Found {0} entries. Joining!".format(post_info.shape[0]))

            all_posts.update(posts)
            all_posts_df = pd.concat([all_posts_df, post_info])

            with pd.ExcelWriter(DATA_FOLDER + 'scraped_comments_full_psaw.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) as writer:
                all_posts_df.to_excel(writer)

Unnamed: 0                                              icnalcx
query                                                    statin
subreddit                                           ketoscience
author                                            dirtyloophole
body          I completely agree that there is a lot of BS Y...
parent_id                                            t1_i9t0ktq
upvotes                                                       1
Name: t3_rnqlnj, dtype: object