# Creating a Stress Detection Tool using Data From Mental Health Subreddits: Data Wrangling

#### Import necessary libraries

In [1]:
import pandas as pd
import numpy as np

#reddit crawler
import praw
from praw.models import MoreComments

#preprocessing
import string

#for saving
import pickle

#### Set up client

In [2]:
r = praw.Reddit(user_agent = '',
                client_id = '',
                client_secret = '',
                check_for_async=False)

#### Create the subreddit list that I want to get data from
* I have mostly chosen mental health  related subreddits
* I added in a few positivity-based subreddits to balance out as well

In [3]:
sr_list = ['adhd', 'affirmations', 'anger', 'anxiety', 'depression', 'mentalillness', 'mindfulness', 'socialanxiety']

#### Get URLs from top reddit posts from each subreddit
* I am creating the first for loop to get the top posts from each subreddit from the API
* The second for loop is to get the comments from each of those posts
* I am then appending it to the list, then converting to a dataframe

In [4]:
posts = []

for sr in sr_list:
    subreddit = r.subreddit(sr)
    
    for post in subreddit.hot(limit=5):
        
        post.comments.replace_more(limit=5)
        for comment in post.comments.list():
             posts.append([post.subreddit, comment.body])

In [5]:
posts = pd.DataFrame(posts)

#### Checking out the data
* .shape to make sure I have enough data
* .head and .tail to make sure everything looks right

In [6]:
posts.shape

(2457, 2)

In [7]:
posts.head(5)

Unnamed: 0,0,1
0,ADHD,"I finally cleaned my entire living space, and ..."
1,ADHD,I found this subreddit
2,ADHD,Made it through the first week of spring term!...
3,ADHD,I finally got my Adderall prescription yesterd...
4,ADHD,I finally got my info together to send to my a...


In [8]:
posts.tail(5)

Unnamed: 0,0,1
2452,socialanxiety,Find another job ASAP. Companies are still hir...
2453,socialanxiety,Imm so sorry that had happened to you hopefull...
2454,socialanxiety,Dont worry! theres always another job who will...
2455,socialanxiety,"Yep, mushrooms."
2456,socialanxiety,I always wanted to try shrooms to ‘fix my brai...


#### Fixing the column names

In [9]:
posts.columns = ['subreddit', 'text']

In [10]:
posts.head(5)

Unnamed: 0,subreddit,text
0,ADHD,"I finally cleaned my entire living space, and ..."
1,ADHD,I found this subreddit
2,ADHD,Made it through the first week of spring term!...
3,ADHD,I finally got my Adderall prescription yesterd...
4,ADHD,I finally got my info together to send to my a...


#### Some preprocessing
* I know I want two versions of my text data available, one with capitilization and punctuation and one without so I am adding the preprocessed text to a new column

In [11]:
def remove_punctuations(text):
    for punctuation in string.punctuation:
        text = text.replace(punctuation, '')
    return text

posts['text_preproc'] = posts['text'].apply(remove_punctuations)

In [12]:
posts['text_preproc'] = posts['text_preproc'].str.lower()

#### Checking things out one more time

In [13]:
posts.head(5)

Unnamed: 0,subreddit,text,text_preproc
0,ADHD,"I finally cleaned my entire living space, and ...",i finally cleaned my entire living space and d...
1,ADHD,I found this subreddit,i found this subreddit
2,ADHD,Made it through the first week of spring term!...,made it through the first week of spring term ...
3,ADHD,I finally got my Adderall prescription yesterd...,i finally got my adderall prescription yesterd...
4,ADHD,I finally got my info together to send to my a...,i finally got my info together to send to my a...


In [14]:
posts.tail(5)

Unnamed: 0,subreddit,text,text_preproc
2452,socialanxiety,Find another job ASAP. Companies are still hir...,find another job asap companies are still hiri...
2453,socialanxiety,Imm so sorry that had happened to you hopefull...,imm so sorry that had happened to you hopefull...
2454,socialanxiety,Dont worry! theres always another job who will...,dont worry theres always another job who will ...
2455,socialanxiety,"Yep, mushrooms.",yep mushrooms
2456,socialanxiety,I always wanted to try shrooms to ‘fix my brai...,i always wanted to try shrooms to ‘fix my brai...


#### Rename 'posts' to 'df'

In [15]:
df = posts

#### Save to pickle

In [16]:
df.to_pickle('df.pickle')