In [1]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import datetime
import matplotlib.pyplot as plt

In [2]:
df_depression = pd.read_csv('suicidewatch_posts.csv')
print(df_depression.shape)
df_depression.head(0)

(4447, 13)


Unnamed: 0,title,score,p_id,subreddit,url,num_comments,body,p_timestamp,c_id,comment,c_timestamp,Post_Reply,Time_to_Comment


In [3]:
df_depression_post = df_depression[['p_id', 'score', 'num_comments', 'p_timestamp']].drop_duplicates(keep='first')
df_depression_post.head()

Unnamed: 0,p_id,score,num_comments,p_timestamp
0,cz6nfd,596,157,2019-09-03 18:49:51
132,d2370x,388,42,2019-09-10 08:26:30
172,dzokcl,103,23,2019-11-21 21:36:29
193,dzjrh6,201,28,2019-11-21 16:04:58
220,dzl7x4,144,28,2019-11-21 17:52:33


In [4]:
df_depression_post_direct_reply = df_depression[df_depression['Post_Reply']=='Y'].groupby('p_id')['num_comments'].count()
df_depression_post_direct_reply = df_depression_post_direct_reply.to_frame().rename(columns={'num_comments': 'direct_reply_comments'})
df_depression_post = pd.merge(df_depression_post, df_depression_post_direct_reply, how = 'left', on = 'p_id')

df_depression_post['direct_comments_proportion'] = df_depression_post['direct_reply_comments']/df_depression_post['num_comments']
df_depression_post['direct_comments_proportion'] = df_depression_post['direct_comments_proportion'].fillna(0)

df_depression_post.head()

Unnamed: 0,p_id,score,num_comments,p_timestamp,direct_reply_comments,direct_comments_proportion
0,cz6nfd,596,157,2019-09-03 18:49:51,32.0,0.203822
1,d2370x,388,42,2019-09-10 08:26:30,6.0,0.142857
2,dzokcl,103,23,2019-11-21 21:36:29,12.0,0.521739
3,dzjrh6,201,28,2019-11-21 16:04:58,15.0,0.535714
4,dzl7x4,144,28,2019-11-21 17:52:33,17.0,0.607143


In [5]:
def calc_minutes(s):
    try:
        s = s.lstrip()
        days = int(s.split(' ', 1)[0])
        s = s.split(' ', 1)[1].lstrip().split(' ', 1)[1] + ' '
        hours = int(s[0:2])
        minutes = int(s[3:5])
        seconds = int(s[6:8])
    
        return days*1440 + hours*60 + minutes + seconds/60
    except:
        return np.NaN


In [6]:
df_depression_post_direct_reply_time = df_depression[df_depression['Post_Reply']=='Y']
df_depression_post_direct_reply_time.head()

df_depression_post_direct_reply_time['time_to_reply'] = 0

for i in range(len(df_depression_post_direct_reply_time)):
    df_depression_post_direct_reply_time['time_to_reply'].iloc[i] = calc_minutes(df_depression_post_direct_reply_time['Time_to_Comment'].iloc[i])


                                                                                      
df_depression_post_direct_reply_time = df_depression_post_direct_reply_time.groupby('p_id')['time_to_reply'].aggregate(['median', 'min'])

df_depression_post = pd.merge(df_depression_post, df_depression_post_direct_reply_time, how = 'left', on = 'p_id')
df_depression_post = df_depression_post.rename(columns={'median': 'median_direct_reply_time', 'min': 'min_reply_time'})

df_depression_post.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0,p_id,score,num_comments,p_timestamp,direct_reply_comments,direct_comments_proportion,median_direct_reply_time,min_reply_time
0,cz6nfd,596,157,2019-09-03 18:49:51,32.0,0.203822,34043.033333,284.233333
1,d2370x,388,42,2019-09-10 08:26:30,6.0,0.142857,46355.975,1058.45
2,dzokcl,103,23,2019-11-21 21:36:29,12.0,0.521739,114.891667,1.75
3,dzjrh6,201,28,2019-11-21 16:04:58,15.0,0.535714,369.183333,67.6
4,dzl7x4,144,28,2019-11-21 17:52:33,17.0,0.607143,225.516667,26.5


In [7]:
sw = stopwords.words('english')

df_depression['body'] = df_depression['body'].astype(str).str.replace('\n', '').str.replace('\\', '')

df_depression['body_sw'] = df_depression['body'].apply(lambda x: ' '.join([word for word in x.split() if word not in (sw)])).astype(str)

#remove special characters, make all characters lowercase
df_depression['body_sw_p'] = df_depression['body_sw'].str.replace('\w\s]', '').str.lower()

df_depression['body_length'] = 0

for i in range(len(df_depression)):
    df_depression['body_length'].iloc[i] = len(df_depression['body'].iloc[i].split(' '))



In [8]:
analyser = SentimentIntensityAnalyzer()

df_depression['Sentiment Scores'] = df_depression['body_sw_p'].apply(analyser.polarity_scores)
df_depression['P_Sent_Neg'] = np.NaN
df_depression['P_Sent_Neu'] = np.NaN
df_depression['P_Sent_Pos'] = np.NaN
df_depression['P_Sent_Com'] = np.NaN


for i in range(len(df_depression)):
    df_depression['P_Sent_Neg'].iloc[i] = df_depression['Sentiment Scores'].iloc[i]['neg']
    df_depression['P_Sent_Neu'].iloc[i] = df_depression['Sentiment Scores'].iloc[i]['neu']
    df_depression['P_Sent_Pos'].iloc[i] = df_depression['Sentiment Scores'].iloc[i]['pos']
    df_depression['P_Sent_Com'].iloc[i] = df_depression['Sentiment Scores'].iloc[i]['compound']

    

df_depression_post_sentiments = df_depression.groupby('p_id')[['P_Sent_Neg', 'P_Sent_Neu', 'P_Sent_Pos', 'P_Sent_Com']].mean()

df_depression_post = pd.merge(df_depression_post, df_depression_post_sentiments, how = 'left', on = 'p_id')
df_depression_post.head()

Unnamed: 0,p_id,score,num_comments,p_timestamp,direct_reply_comments,direct_comments_proportion,median_direct_reply_time,min_reply_time,P_Sent_Neg,P_Sent_Neu,P_Sent_Pos,P_Sent_Com
0,cz6nfd,596,157,2019-09-03 18:49:51,32.0,0.203822,34043.033333,284.233333,0.281,0.502,0.217,-0.9993
1,d2370x,388,42,2019-09-10 08:26:30,6.0,0.142857,46355.975,1058.45,0.108,0.747,0.145,-0.3612
2,dzokcl,103,23,2019-11-21 21:36:29,12.0,0.521739,114.891667,1.75,0.0,1.0,0.0,0.0
3,dzjrh6,201,28,2019-11-21 16:04:58,15.0,0.535714,369.183333,67.6,0.125,0.755,0.12,-0.4892
4,dzl7x4,144,28,2019-11-21 17:52:33,17.0,0.607143,225.516667,26.5,0.203,0.588,0.209,-0.1779


In [9]:
df_depression['comment'] = df_depression['comment'].astype(str).str.replace('\n', '').str.replace('\\', '')

df_depression['comment_sw'] = df_depression['comment'].apply(lambda x: ' '.join([word for word in x.split() if word not in (sw)])).astype(str)

#remove special characters, make all characters lowercase
df_depression['comment_sw_p'] = df_depression['comment_sw'].str.replace('[^\w\s]', '').str.lower()


#Create boolean for deleted comments
df_depression['deleted'] = np.where(df_depression['comment']=='[deleted]', 'Y', 'N')

df_depression['comment_length'] = 0

for i in range(len(df_depression)):
    df_depression['comment_length'].iloc[i] = len(df_depression['comment'].iloc[i].split(' '))

df_depression['comment_length'] = np.where(df_depression['deleted']=='Y', np.NaN, df_depression['comment_length'])

In [10]:
df_depression_comment_length = df_depression.groupby('p_id')['comment_length'].median()
df_depression_comment_length = df_depression_comment_length.to_frame().rename(columns={'comment_length': 'median_comment_length'})

df_depression_post = pd.merge(df_depression_post, df_depression_comment_length, how = 'left', on = 'p_id')
df_depression_post.head()

Unnamed: 0,p_id,score,num_comments,p_timestamp,direct_reply_comments,direct_comments_proportion,median_direct_reply_time,min_reply_time,P_Sent_Neg,P_Sent_Neu,P_Sent_Pos,P_Sent_Com,median_comment_length
0,cz6nfd,596,157,2019-09-03 18:49:51,32.0,0.203822,34043.033333,284.233333,0.281,0.502,0.217,-0.9993,25.0
1,d2370x,388,42,2019-09-10 08:26:30,6.0,0.142857,46355.975,1058.45,0.108,0.747,0.145,-0.3612,24.0
2,dzokcl,103,23,2019-11-21 21:36:29,12.0,0.521739,114.891667,1.75,0.0,1.0,0.0,0.0,5.0
3,dzjrh6,201,28,2019-11-21 16:04:58,15.0,0.535714,369.183333,67.6,0.125,0.755,0.12,-0.4892,31.0
4,dzl7x4,144,28,2019-11-21 17:52:33,17.0,0.607143,225.516667,26.5,0.203,0.588,0.209,-0.1779,34.5


In [11]:
df_depression['Sentiment Scores'] = df_depression['comment_sw_p'].apply(analyser.polarity_scores)
df_depression['C_Sent_Neg'] = np.NaN
df_depression['C_Sent_Neu'] = np.NaN
df_depression['C_Sent_Pos'] = np.NaN
df_depression['C_Sent_Com'] = np.NaN


for i in range(len(df_depression)):
    df_depression['C_Sent_Neg'].iloc[i] = df_depression['Sentiment Scores'].iloc[i]['neg']
    df_depression['C_Sent_Neu'].iloc[i] = df_depression['Sentiment Scores'].iloc[i]['neu']
    df_depression['C_Sent_Pos'].iloc[i] = df_depression['Sentiment Scores'].iloc[i]['pos']
    df_depression['C_Sent_Com'].iloc[i] = df_depression['Sentiment Scores'].iloc[i]['compound']

df_depression['C_Sent_Neg'] = np.where(df_depression['comment']=='[deleted]', np.NaN, df_depression['C_Sent_Neg'])
df_depression['C_Sent_Neu'] = np.where(df_depression['comment']=='[deleted]', np.NaN, df_depression['C_Sent_Neu'])
df_depression['C_Sent_Pos'] = np.where(df_depression['comment']=='[deleted]', np.NaN, df_depression['C_Sent_Pos'])
df_depression['C_Sent_Com'] = np.where(df_depression['comment']=='[deleted]', np.NaN, df_depression['C_Sent_Com'])
    
    
df_depression_comment_sentiments = df_depression.groupby('p_id')[['C_Sent_Neg', 'C_Sent_Neu', 'C_Sent_Pos', 'C_Sent_Com']].median()

df_depression_post = pd.merge(df_depression_post, df_depression_comment_sentiments, how = 'left', on = 'p_id')
df_depression_post.head()

Unnamed: 0,p_id,score,num_comments,p_timestamp,direct_reply_comments,direct_comments_proportion,median_direct_reply_time,min_reply_time,P_Sent_Neg,P_Sent_Neu,P_Sent_Pos,P_Sent_Com,median_comment_length,C_Sent_Neg,C_Sent_Neu,C_Sent_Pos,C_Sent_Com
0,cz6nfd,596,157,2019-09-03 18:49:51,32.0,0.203822,34043.033333,284.233333,0.281,0.502,0.217,-0.9993,25.0,0.1275,0.5975,0.2045,0.0
1,d2370x,388,42,2019-09-10 08:26:30,6.0,0.142857,46355.975,1058.45,0.108,0.747,0.145,-0.3612,24.0,0.0965,0.6985,0.087,0.0
2,dzokcl,103,23,2019-11-21 21:36:29,12.0,0.521739,114.891667,1.75,0.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,0.0
3,dzjrh6,201,28,2019-11-21 16:04:58,15.0,0.535714,369.183333,67.6,0.125,0.755,0.12,-0.4892,31.0,0.284,0.526,0.137,-0.2732
4,dzl7x4,144,28,2019-11-21 17:52:33,17.0,0.607143,225.516667,26.5,0.203,0.588,0.209,-0.1779,34.5,0.2545,0.584,0.1715,-0.3273


In [12]:
df_depression_post['Diff_Sent_Neg'] = df_depression_post['C_Sent_Neg'] - df_depression_post['P_Sent_Neg']
df_depression_post['Diff_Sent_Neu'] = df_depression_post['C_Sent_Neu'] - df_depression_post['P_Sent_Neu']
df_depression_post['Diff_Sent_Pos'] = df_depression_post['C_Sent_Pos'] - df_depression_post['P_Sent_Pos']
df_depression_post['Diff_Sent_Com'] = df_depression_post['C_Sent_Com'] - df_depression_post['P_Sent_Com']

df_depression_post.head()

Unnamed: 0,p_id,score,num_comments,p_timestamp,direct_reply_comments,direct_comments_proportion,median_direct_reply_time,min_reply_time,P_Sent_Neg,P_Sent_Neu,...,P_Sent_Com,median_comment_length,C_Sent_Neg,C_Sent_Neu,C_Sent_Pos,C_Sent_Com,Diff_Sent_Neg,Diff_Sent_Neu,Diff_Sent_Pos,Diff_Sent_Com
0,cz6nfd,596,157,2019-09-03 18:49:51,32.0,0.203822,34043.033333,284.233333,0.281,0.502,...,-0.9993,25.0,0.1275,0.5975,0.2045,0.0,-0.1535,0.0955,-0.0125,0.9993
1,d2370x,388,42,2019-09-10 08:26:30,6.0,0.142857,46355.975,1058.45,0.108,0.747,...,-0.3612,24.0,0.0965,0.6985,0.087,0.0,-0.0115,-0.0485,-0.058,0.3612
2,dzokcl,103,23,2019-11-21 21:36:29,12.0,0.521739,114.891667,1.75,0.0,1.0,...,0.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,dzjrh6,201,28,2019-11-21 16:04:58,15.0,0.535714,369.183333,67.6,0.125,0.755,...,-0.4892,31.0,0.284,0.526,0.137,-0.2732,0.159,-0.229,0.017,0.216
4,dzl7x4,144,28,2019-11-21 17:52:33,17.0,0.607143,225.516667,26.5,0.203,0.588,...,-0.1779,34.5,0.2545,0.584,0.1715,-0.3273,0.0515,-0.004,-0.0375,-0.1494


In [14]:
w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
lemmatizer = nltk.stem.WordNetLemmatizer()

def lemmatize_text(text):
    return [lemmatizer.lemmatize(w) for w in w_tokenizer.tokenize(text)]

In [15]:
df_depression['body_lemmatized'] = ''
df_depression['comment_lemmatized'] = ''

for i in range(len(df_depression)):
    df_depression['body_lemmatized'].iloc[i] = lemmatize_text(df_depression['body_sw_p'].iloc[i])
    df_depression['comment_lemmatized'].iloc[i] = lemmatize_text(df_depression['comment_sw_p'].iloc[i])

In [16]:
neg_feelings = ['sad', 'bitter', 'regret', 'hate', 'hopeless', 'exhausted', 'numb', 'tired',
                'depressed', 'alone', 'lonely', 'isolate', 'shitty', 'failure', 'cry',
                'worthless', 'empty', 'toxic', 'prison', 'torture', 'boring', 'monotonous',
                'sucks', 'pointless', 'nothing', 'unhappy', 'meaningless', 'anxiety', 'invisibile',
                'abusive', 'struggle']

suicide_act = ['kill', 'die', 'rope', 'knife', 'pills', 'hang', 'cut', 'suicide']

goal = ['motivation', 'care', 'achieve', 'happy', 'strong', 'proud', 'socialize', 'hope',
        'excited', 'bath', 'shower', 'family', 'help', 'improvement', 'flush']

medical = ['antidepressant', 'therapy', 'therapy', 'therapist', 'psychiatrist', 'medicate', 'medicine']

fps = ['i', "i'm", 'im', 'me', 'myself', 'my']



neg_feelings = " ".join(neg_feelings)
suicide_act = " ".join(suicide_act)
goal = " ".join(goal)
medical = " ".join(medical)
fps = " ".join(fps)

In [17]:
df_depression['C_Neg_Feelings'] = 0 
df_depression['C_Suicide_Act'] = 0
df_depression['C_Goal'] = 0
df_depression['C_Medical'] = 0
df_depression['C_FPS'] = 0

df_depression['P_Neg_Feelings'] = 0 
df_depression['P_Suicide_Act'] = 0
df_depression['P_Goal'] = 0
df_depression['P_Medical'] = 0
df_depression['P_FPS'] = 0

In [18]:
cols_c = ['C_Neg_Feelings', 'C_Suicide_Act', 'C_Goal', 'C_Medical', 'C_FPS']
cols_p = ['P_Neg_Feelings', 'P_Suicide_Act', 'P_Goal', 'P_Medical', 'P_FPS']

lists = [neg_feelings, suicide_act, goal, medical, fps]


for i in range(len(cols_c)):
    for j in range(len(df_depression['comment_lemmatized'])):
        
        count = 0
        for k in range(len(df_depression['comment_lemmatized'].iloc[j])):
            if(df_depression['comment_lemmatized'].iloc[j][k].lower() in lists[i]):
                count += 1
        if(len(df_depression['comment_lemmatized'].iloc[j])==0):
            df_depression[cols_c[i]].iloc[j] = 0
        else:
            df_depression[cols_c[i]].iloc[j] = count/len(df_depression['comment_lemmatized'].iloc[j])

            
for i in range(len(cols_p)):
    for j in range(len(df_depression['body_lemmatized'])):
        
        count = 0
        for k in range(len(df_depression['body_lemmatized'].iloc[j])):
            if(df_depression['body_lemmatized'].iloc[j][k].lower() in lists[i]):
                count += 1
        if(len(df_depression['body_lemmatized'].iloc[j])==0):
            df_depression[cols_p[i]].iloc[j] = 0
        else:
            df_depression[cols_p[i]].iloc[j] = count/len(df_depression['body_lemmatized'].iloc[j])

In [19]:
df_depression_frequencies = df_depression.groupby('p_id')['C_Neg_Feelings', 'C_Suicide_Act', 'C_Goal', 
                                                          'C_Medical', 'C_FPS', 'P_Neg_Feelings', 'P_Suicide_Act', 
                                                          'P_Goal', 'P_Medical', 'P_FPS'].median()

df_depression_post = pd.merge(df_depression_post, df_depression_frequencies, how = 'left', on = 'p_id')

In [20]:
df_depression_post.head(10)

Unnamed: 0,p_id,score,num_comments,p_timestamp,direct_reply_comments,direct_comments_proportion,median_direct_reply_time,min_reply_time,P_Sent_Neg,P_Sent_Neu,...,C_Neg_Feelings,C_Suicide_Act,C_Goal,C_Medical,C_FPS,P_Neg_Feelings,P_Suicide_Act,P_Goal,P_Medical,P_FPS
0,cz6nfd,596,157,2019-09-03 18:49:51,32.0,0.203822,34043.033333,284.233333,0.281,0.502,...,0.067521,0.039608,0.075499,0.020442,0.0,0.03838,0.022388,0.037313,0.007463,0.001066
1,d2370x,388,42,2019-09-10 08:26:30,6.0,0.142857,46355.975,1058.45,0.108,0.747,...,0.083478,0.066667,0.066667,0.028006,0.006667,0.016949,0.016949,0.016949,0.016949,0.016949
2,dzokcl,103,23,2019-11-21 21:36:29,12.0,0.521739,114.891667,1.75,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,dzjrh6,201,28,2019-11-21 16:04:58,15.0,0.535714,369.183333,67.6,0.125,0.755,...,0.09375,0.052632,0.051282,0.034483,0.0,0.182927,0.162602,0.162602,0.154472,0.146341
4,dzl7x4,144,28,2019-11-21 17:52:33,17.0,0.607143,225.516667,26.5,0.203,0.588,...,0.119336,0.06754,0.112374,0.074752,0.070773,0.083333,0.055556,0.055556,0.055556,0.055556
5,dzkdgh,97,11,2019-11-21 16:52:36,10.0,0.909091,201.2,19.783333,0.127,0.567,...,0.076923,0.0,0.305556,0.0,0.0,0.137615,0.100917,0.119266,0.12844,0.12844
6,dzh09p,238,48,2019-11-21 11:48:50,26.0,0.541667,475.591667,167.033333,0.304,0.535,...,0.088496,0.0,0.021739,0.0,0.0,0.084592,0.033233,0.060423,0.0,0.012085
7,dzlv8e,34,22,2019-11-21 18:35:24,7.0,0.318182,124.516667,6.783333,0.217,0.334,...,0.055053,0.0,0.020833,0.0,0.0,0.2,0.2,0.2,0.2,0.2
8,dzjknq,54,8,2019-11-21 15:49:51,4.0,0.5,313.808333,231.916667,0.12,0.88,...,0.0,0.0,0.125,0.0,0.083333,0.047619,0.047619,0.047619,0.047619,0.142857
9,dzmgt9,21,9,2019-11-21 19:15:56,6.0,0.666667,77.808333,5.783333,0.211,0.607,...,0.120773,0.069412,0.071096,0.032384,0.032384,0.136691,0.100719,0.136691,0.107914,0.122302


In [21]:
df_depression_post.shape

(980, 31)

In [22]:
df_depression.head(10)

Unnamed: 0,title,score,p_id,subreddit,url,num_comments,body,p_timestamp,c_id,comment,...,C_Neg_Feelings,C_Suicide_Act,C_Goal,C_Medical,C_FPS,P_Neg_Feelings,P_Suicide_Act,P_Goal,P_Medical,P_FPS
0,New wiki on how to avoid accidentally encourag...,596,cz6nfd,SuicideWatch,https://www.reddit.com/r/SuicideWatch/comments...,157,We've been seeing a worrying increase in pro-s...,2019-09-03 18:49:51,eyx1qcc,I'm sorry did you say suicide fetishists? As i...,...,0.076923,0.230769,0.153846,0.076923,0.076923,0.03838,0.022388,0.037313,0.007463,0.001066
1,New wiki on how to avoid accidentally encourag...,596,cz6nfd,SuicideWatch,https://www.reddit.com/r/SuicideWatch/comments...,157,We've been seeing a worrying increase in pro-s...,2019-09-03 18:49:51,ezsdkca,"I didn’t necessarily see this addressed, perha...",...,0.233333,0.144444,0.211111,0.111111,0.133333,0.03838,0.022388,0.037313,0.007463,0.001066
2,New wiki on how to avoid accidentally encourag...,596,cz6nfd,SuicideWatch,https://www.reddit.com/r/SuicideWatch/comments...,157,We've been seeing a worrying increase in pro-s...,2019-09-03 18:49:51,ez8vhv7,"I read so much about what NOT to do, what not ...",...,0.084906,0.066038,0.09434,0.066038,0.04717,0.03838,0.022388,0.037313,0.007463,0.001066
3,New wiki on how to avoid accidentally encourag...,596,cz6nfd,SuicideWatch,https://www.reddit.com/r/SuicideWatch/comments...,157,We've been seeing a worrying increase in pro-s...,2019-09-03 18:49:51,eyy28y9,"For the longest time, I thought this was a sub...",...,0.04,0.04,0.04,0.04,0.04,0.03838,0.022388,0.037313,0.007463,0.001066
4,New wiki on how to avoid accidentally encourag...,596,cz6nfd,SuicideWatch,https://www.reddit.com/r/SuicideWatch/comments...,157,We've been seeing a worrying increase in pro-s...,2019-09-03 18:49:51,ezsco41,Everyone in the place is just incredibly conde...,...,0.0,0.0,0.0,0.0,0.0,0.03838,0.022388,0.037313,0.007463,0.001066
5,New wiki on how to avoid accidentally encourag...,596,cz6nfd,SuicideWatch,https://www.reddit.com/r/SuicideWatch/comments...,157,We've been seeing a worrying increase in pro-s...,2019-09-03 18:49:51,eyx6dwg,Oh my gosh thank you for this. Sometimes I fe...,...,0.125,0.0625,0.1875,0.125,0.1875,0.03838,0.022388,0.037313,0.007463,0.001066
6,New wiki on how to avoid accidentally encourag...,596,cz6nfd,SuicideWatch,https://www.reddit.com/r/SuicideWatch/comments...,157,We've been seeing a worrying increase in pro-s...,2019-09-03 18:49:51,f0pvj8i,Bruuuh if a sane person wants to die it’s thei...,...,0.142857,0.142857,0.142857,0.0,0.0,0.03838,0.022388,0.037313,0.007463,0.001066
7,New wiki on how to avoid accidentally encourag...,596,cz6nfd,SuicideWatch,https://www.reddit.com/r/SuicideWatch/comments...,157,We've been seeing a worrying increase in pro-s...,2019-09-03 18:49:51,ezifalg,Some subs let you state which sub rule is brok...,...,0.05,0.0,0.05,0.05,0.0,0.03838,0.022388,0.037313,0.007463,0.001066
8,New wiki on how to avoid accidentally encourag...,596,cz6nfd,SuicideWatch,https://www.reddit.com/r/SuicideWatch/comments...,157,We've been seeing a worrying increase in pro-s...,2019-09-03 18:49:51,f0qrzus,I read the wiki. And I’ve got a question.I don...,...,0.126126,0.072072,0.108108,0.063063,0.063063,0.03838,0.022388,0.037313,0.007463,0.001066
9,New wiki on how to avoid accidentally encourag...,596,cz6nfd,SuicideWatch,https://www.reddit.com/r/SuicideWatch/comments...,157,We've been seeing a worrying increase in pro-s...,2019-09-03 18:49:51,f15u8v6,>This undermines our purpose hereNot trying to...,...,0.086957,0.0,0.0,0.043478,0.0,0.03838,0.022388,0.037313,0.007463,0.001066


In [23]:
df_depression.shape

(4447, 41)

In [24]:
df_depression_post.to_csv('suicidewatch_threads.csv')
df_depression.to_csv('suicidewatch_comments.csv')