# Data Exploration on Reddit SuicideWatch Posts

Source: from https://reddit.com/r/SuicideWatch/new.json?limit=100

In [None]:
import requests
import pandas as pd

In [30]:
# Function to fetch data from the API
def fetch_reddit_data(url, limit=100):
    response = requests.get(url+str(limit))
    if response.status_code == 200:
        data = response.json()
        children_data = [child['data'] for child in data['data']['children']]
        return pd.DataFrame(children_data)
    else:
        print(f"Failed to fetch data. Status code: {response.status_code}")
        return pd.DataFrame()

# Base URL for the API
base_url = "https://reddit.com/r/SuicideWatch/new.json?limit="

# List to store dataframes
dataframes = []

# Fetch data 10 times
for _ in range(10):
    df = fetch_reddit_data(base_url)
    dataframes.append(df)

# Concatenate all dataframes into a single dataframe
reddit_df_1000 = pd.concat(dataframes, ignore_index=True)

# Display the first few rows of the dataframe
reddit_df_1000.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,...,author_flair_text_color,permalink,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,author_cakeday
0,,SuicideWatch,"Everyone wants to pathologize, moralize, and r...",t2_pfkg1yd73,False,,0,False,"There's no real help for people, because peopl...",[],...,dark,/r/SuicideWatch/comments/1jdy0mk/theres_no_rea...,False,https://www.reddit.com/r/SuicideWatch/comments...,519429,1742276000.0,0,,False,
1,,SuicideWatch,hi everyone. i’ve tried to be GOOD for as long...,t2_1cyguorgnl,False,,0,False,i want to end things because i don’t like who ...,[],...,,/r/SuicideWatch/comments/1jdy040/i_want_to_end...,False,https://www.reddit.com/r/SuicideWatch/comments...,519429,1742276000.0,0,,False,
2,,SuicideWatch,I don't want to wake up and be me anymore. Mor...,t2_1ld9rc3s1l,False,,0,False,I don't think I can continue with who I am.,[],...,,/r/SuicideWatch/comments/1jdxyyu/i_dont_think_...,False,https://www.reddit.com/r/SuicideWatch/comments...,519429,1742276000.0,0,,False,
3,,SuicideWatch,I've attempted suicide in the past and i found...,t2_1lfbomi4g8,False,,0,False,"I'm not afraid to die, but i'm only here becau...",[],...,,/r/SuicideWatch/comments/1jdxylv/im_not_afraid...,False,https://www.reddit.com/r/SuicideWatch/comments...,519429,1742276000.0,0,,False,
4,,SuicideWatch,im constantly stressed/anxious everyday bc of ...,t2_5jhndftjx,False,,0,False,ideation is comforting,[],...,,/r/SuicideWatch/comments/1jdxqs2/ideation_is_c...,False,https://www.reddit.com/r/SuicideWatch/comments...,519429,1742275000.0,0,,False,


In [31]:
print(f"Length of Dataset: {len(reddit_df_1000)}")

Length of Dataset: 1000


Checking Repeated Posts from the dataset (meaningless for our analysis)

In [32]:
# Check for repeated texts in the "selftext" column
repeated_texts = reddit_df_1000['selftext'].value_counts()

# Filter out texts that are repeated more than once
repeated_texts = repeated_texts[repeated_texts > 1]

# Display the repeated texts
print(repeated_texts)

selftext
Everyone wants to pathologize, moralize, and reduce their liability and exposure to suicide. \nIf people want others to not feel suicidal, you have to do more than coerce, threaten, avoid, preach at, or shame those who feel this way. \n\nThe solution is really fucking simple, have some god damn empathy. Yet...there's not really any in my life. No one gives a fuck.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

Looks like repeatedly calling the API will not fetch new data, and hence we adopt 100 posts only.

In [None]:
import pandas as pd

In [36]:
# Base URL for the API
base_url = "https://reddit.com/r/SuicideWatch/new.json?limit="

# List to store dataframes
dataframes = []

# Fetch it only once
df = fetch_reddit_data(base_url)
dataframes.append(df)

# Concatenate all dataframes into a single dataframe
reddit_df_100 = pd.concat(dataframes, ignore_index=True)

# Display the first few rows of the dataframe
reddit_df_100.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,...,author_patreon_flair,author_flair_text_color,permalink,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video
0,,SuicideWatch,I reached a really low point today. I am strug...,t2_xs124w9j5,False,,0,False,Help?,[],...,False,,/r/SuicideWatch/comments/1jdy3rk/help/,False,https://www.reddit.com/r/SuicideWatch/comments...,519431,1742277000.0,0,,False
1,,SuicideWatch,"Everyone wants to pathologize, moralize, and r...",t2_pfkg1yd73,False,,0,False,"There's no real help for people, because peopl...",[],...,False,dark,/r/SuicideWatch/comments/1jdy0mk/theres_no_rea...,False,https://www.reddit.com/r/SuicideWatch/comments...,519431,1742276000.0,0,,False
2,,SuicideWatch,hi everyone. i’ve tried to be GOOD for as long...,t2_1cyguorgnl,False,,0,False,i want to end things because i don’t like who ...,[],...,False,,/r/SuicideWatch/comments/1jdy040/i_want_to_end...,False,https://www.reddit.com/r/SuicideWatch/comments...,519431,1742276000.0,0,,False
3,,SuicideWatch,I don't want to wake up and be me anymore. Mor...,t2_1ld9rc3s1l,False,,0,False,I don't think I can continue with who I am.,[],...,False,,/r/SuicideWatch/comments/1jdxyyu/i_dont_think_...,False,https://www.reddit.com/r/SuicideWatch/comments...,519431,1742276000.0,0,,False
4,,SuicideWatch,I've attempted suicide in the past and i found...,t2_1lfbomi4g8,False,,0,False,"I'm not afraid to die, but i'm only here becau...",[],...,False,,/r/SuicideWatch/comments/1jdxylv/im_not_afraid...,False,https://www.reddit.com/r/SuicideWatch/comments...,519431,1742276000.0,0,,False


In [37]:
print(f"Length of Dataset: {len(reddit_df_100)}")

Length of Dataset: 100


In [39]:
# Check for repeated texts in the "selftext" column
repeated_texts = reddit_df_100['selftext'].value_counts()

# Filter out texts that are repeated more than once
repeated_texts = repeated_texts[repeated_texts > 1]

# Display the repeated texts
print(repeated_texts)

Series([], Name: count, dtype: int64)


No repeated texts are found, we can proceed.

In [41]:
print(f"Columns in the dataset: {reddit_df_100.columns}")

Columns in the dataset: Index(['approved_at_utc', 'subreddit', 'selftext', 'author_fullname', 'saved',
       'mod_reason_title', 'gilded', 'clicked', 'title', 'link_flair_richtext',
       ...
       'author_patreon_flair', 'author_flair_text_color', 'permalink',
       'stickied', 'url', 'subreddit_subscribers', 'created_utc',
       'num_crossposts', 'media', 'is_video'],
      dtype='object', length=102)


In [None]:
# Save to CSV
reddit_df_100.to_csv('reddit_suicidewatch.csv', index=False)