# Importing Data

In [1]:
import requests
import time
from tqdm import tqdm
import pandas as pd

# Obtain the Data from Reddit

## Option 1 - Importing Data using json

In [2]:
# create function to access the api and receive the requested data
def get_json(url):
    posts = []
    header = {'User-agent': 'hello world 0.1'}
    after = None
    for i in tqdm(range(40)):
        if after == None:
            params = {}
        else:
            params = {'after': after}
        res = requests.get(url, headers= header)
        if res.status_code == 200:
            the_json = res.json()
            posts.extend(the_json['data']['children'])
            after = the_json['data']['after']
        else:
            print(res.status_code)
            break
        time.sleep(1)
    return(posts)

# Make a data frame with the recieved data
def to_df(posts):
    label = []
    title = []
    text = []
    for i in range(len(posts)):
        label.append(posts[i]['data']['subreddit'])
        title.append(posts[i]['data']['title'])
        text.append(posts[i]['data']['selftext'])
    combine = {'label': label, 'title': title, 'text': text}
    df = pd.DataFrame(combine)
    return(df)

In [3]:
# Get the republican subreddit
url = "https://www.reddit.com/r/Republican/new.json"
post_rep = get_json(url)
rep = to_df(post_rep)
rep.head()

100%|██████████| 40/40 [00:53<00:00,  1.31s/it]


Unnamed: 0,label,title,text
0,Republican,Notorious Late-Term Abortionist Loses Medical ...,
1,Republican,Can anyone find the ironic flaw in this article?,
2,Republican,"Trump Threatens To Close Southern Border, End ...",
3,Republican,$10 Billion to Central America,
4,Republican,Bump Stock Ban Broken Down: Unconstitutional A...,


In [4]:
# Get the democrat subreddit
url = "https://www.reddit.com/r/democrats/new.json"
post_dem = get_json(url)
dem = to_df(post_dem)
dem.head()

100%|██████████| 40/40 [00:54<00:00,  1.35s/it]


Unnamed: 0,label,title,text
0,democrats,"""artistically designed steel slats"" sounds a l...",
1,democrats,Trump’s Gentrification Scheme to Enrich Real E...,
2,democrats,"Once again, Democrats have to clean up the GOP...",
3,democrats,Dem-led House must lead cleanup of Trump-made ...,
4,democrats,"For First Time, Majority Of Americans Want Tru...",


##  Option 2 - Importing Data using PRAW (Python Reddit API Wrapper)

- Install PRAW
- Obtain API keys from [Reddit Developed Applications](https://www.reddit.com/prefs/apps/)
- Save the API keys into json file

In [5]:
import praw
import json

In [6]:
# Save the API key 
with open('../api_key.json', 'r') as f:
    keys = json.loads(f.read())

In [7]:
# Using the right crediential from API key, request and receive the data
def get_data(subreddit):
    reddit = praw.Reddit(client_id= keys['client_id'],
                     client_secret= keys['client_secret'],
                     password= keys['password'],
                     user_agent= keys['user_agent'],
                     username= keys['username'])
    if reddit.user.me() == keys['username']:
        label = []
        title = []
        text = []
        for submission in reddit.subreddit(subreddit).hot(limit=None):
            title.append(submission.title)
            text.append(submission.selftext)
            label.append(submission.subreddit)
        combine = {'label': label, 'title': title, 'text': text}
        df = pd.DataFrame(combine)
    else:
        print("Invalid API key")
    return(df)

In [8]:
# Get the republican subreddit
df_rep = get_data('Republican')
df_rep.head()

Unnamed: 0,label,title,text
0,Republican,"Finally, US hospitals will have to post their ...",
1,Republican,$10 Billion to Central America,
2,Republican,"Trump Threatens To Close Southern Border, End ...",
3,Republican,Signs of the Times,
4,Republican,Notorious Late-Term Abortionist Loses Medical ...,


In [9]:
# Get the democrat subreddit
df_dem = get_data('democrats')
df_dem.head()

Unnamed: 0,label,title,text
0,democrats,Mueller getting a shout out on Jeopardy today,
1,democrats,RT and R/The_D.....: Pro-Trump subreddit upvot...,
2,democrats,Donald Trump’s approval rating equals his all-...,
3,democrats,Lol,
4,democrats,Dem-led House must lead cleanup of Trump-made ...,


In [10]:
# Check the shape of the dataframes
print(df_rep.shape)
print(df_dem.shape)

(633, 3)
(997, 3)


In [11]:
# Export dataframes to csv file
df_rep.to_csv('../dataset/reddit_rep.csv', index=False)
df_dem.to_csv('../dataset/reddit_dem.csv', index=False)