In [1]:
import pandas as pd
import sqlite3

SQLITE_DATA = "../../data_collection/reddit_ed_0.6.0.sqlite3"
POSITIVE_DATA = "../../narrative_detection/narrative_posts_by_trained_classification.csv"

## All posts

In [2]:
# Read in the sqlite data ---- 
con = sqlite3.connect(SQLITE_DATA)
sqlite_df = pd.read_sql_query("SELECT * from subreddit_submission_metadata", con)
con.close()
sqlite_df.head(5)

Unnamed: 0,subreddit,selftext,author_fullname,title,score,link_flair_css_class,link_flair_text,author_flair_type,over_18,author_flair_text,subreddit_id,id,is_self,author,author_flair_css_class,permalink,url,created_utc
0,EatingDisorders,23 F. I just started treatment for an eating d...,,I need moral support,1,,Seeking Support,,,,t5_2qpcj,10poqxu,1,GullibleCup27,,/r/EatingDisorders/comments/10poqxu/i_need_mor...,https://www.reddit.com/r/EatingDisorders/comme...,1675140001
1,EatingDisorders,I 26 f told the truth for the first time about...,,Opening up about my ed,1,,Seeking Support,,,,t5_2qpcj,10pnoxo,1,mistinthrowaway,,/r/EatingDisorders/comments/10pnoxo/opening_up...,https://www.reddit.com/r/EatingDisorders/comme...,1675137504
2,EatingDisorders,[removed],,My Long Battle With Binge Eating Coming To An End,1,one,Recovery Story,,,,t5_2qpcj,10plv3v,1,LetMeTuckYouInHomie,,/r/EatingDisorders/comments/10plv3v/my_long_ba...,https://www.reddit.com/r/EatingDisorders/comme...,1675132759
3,EatingDisorders,"Greetings,\n\nA research team from Carnegie Va...",,[Mod Approved] Are you a man who has experienc...,1,,,,,,t5_2qpcj,10pkd8e,1,Acceptable_Ad_3206,,/r/EatingDisorders/comments/10pkd8e/mod_approv...,https://www.reddit.com/r/EatingDisorders/comme...,1675128549
4,EatingDisorders,Idk who to talk to about this so I resort to r...,,Just ranting ig,1,,,,,,t5_2qpcj,10pjinj,1,tomato_bean876,,/r/EatingDisorders/comments/10pjinj/just_ranti...,https://www.reddit.com/r/EatingDisorders/comme...,1675126245


In [3]:
# Only retain rows where the word count is larger than 5 (to avoid deleted post)
sqlite_df = sqlite_df[sqlite_df['selftext'].str.split().str.len() > 5].reset_index(drop=True)
user_sqlite = sqlite_df.groupby(["author", "subreddit_id"]).agg({"selftext": lambda x: list(x), "created_utc": lambda x: list(x), "link_flair_text": lambda x: list(x), "title": lambda x: list(x)}).reset_index()
user_sqlite.to_csv("../data/user_sqlite.csv", index=False)

In [4]:
# How many users have more than 2 posts
user_sqlite[user_sqlite['selftext'].str.len() > 2]

Unnamed: 0,author,subreddit_id,selftext,created_utc,link_flair_text,title
2,-CreamyPie-,t5_o3plh,[Hello!\n\nIm 11 and Im a boy self recovering ...,"[1588343070, 1588190127, 1587836033, 1587775535]","[Recovery Win, None, None, None]","[Fear Food Friday!! So far its successful, Fea..."
23,0463517228539110,t5_s5o7i,"[Good afternoon, all!\n\nApologies &amp; mods ...","[1577568439, 1575391492, 1574082654, 1573661542]","[None, None, None, None]","[Daily Food Log 12/28/2019, Daily Food Log 12/..."
49,18weepingwillow83,t5_s5o7i,[Does anypne feel like their spouse actually p...,"[1606201272, 1598569670, 1589324864, 158888174...","[None, None, None, None, None, None, None]","[Spouses opinion, Silver Lining, Got this, Cha..."
50,194569324,t5_s5o7i,"[Underweight (44 kg), struggling with AN for a...","[1588083202, 1587130632, 1586178189]","[None, None, None]","[TW, Numbers: I'm afraid I'm about to have a h..."
51,197326743251b,t5_2tmc8,[im usually fine. i just repress everything. b...,"[1632579750, 1631492734, 1631058338, 163425481...","[:snoo_feelsbadman: Just venting, :snoo_though...",[i fear what will happen in future breakdowns....
...,...,...,...,...,...,...
9156,yoyoyoyoyo_,t5_o3plh,[It's been 5 months of recovery (went fully al...,"[1621067863, 1620035920, 1610795881, 1610534661]","[Recovery Win, Question, Support Needed, Quest...","[Not cold all the time anymore!, Should I go o..."
9165,yunghazey,t5_2zzq8,[i didn’t purge for 16 days and then my streak...,"[1607399817, 1605310668, 1604713939, 1604453718]","[Trigger Warning, None, None, TW: Numbers]","[i just broke my streak, TW: weight scales, fe..."
9187,ziyal79,t5_s5o7i,"[So earlier today, I weighed myself, and found...","[1624622194, 1621665385, 1613787373, 1572833486]","[None, None, None, None]",[I need someone to talk me off the proverbial ...
9194,zoqna,t5_o3plh,[This post mentions s/icide and relapsing.\n\n...,"[1622945156, 1612712720, 1612405803, 1609465265]","[None, None, None, Support Needed]","[TW VENTING, Rephrase your post, Is it necessa..."


## Narrative-positive posts

In [5]:
# Read in the positive data ---- 
positive_df = pd.read_csv(POSITIVE_DATA)
positive_df.head(5)

Unnamed: 0.1,Unnamed: 0,subreddit,selftext,author_fullname,title,score,link_flair_css_class,link_flair_text,author_flair_type,over_18,...,author_flair_css_class,permalink,url,created_utc,num_words,narrative_positive,narrative_negative,narrative_uncertain,narrative_label,predicted_narrative_label
0,0,EatingDisorders,23 F. I just started treatment for an eating d...,,I need moral support,1,,seeking support,,,...,,/r/EatingDisorders/comments/10poqxu/i_need_mor...,https://www.reddit.com/r/EatingDisorders/comme...,1675140001,191,0,0,0,-1,1
1,1,EatingDisorders,I 26 f told the truth for the first time about...,,Opening up about my ed,1,,seeking support,,,...,,/r/EatingDisorders/comments/10pnoxo/opening_up...,https://www.reddit.com/r/EatingDisorders/comme...,1675137504,273,0,0,0,-1,1
2,4,EatingDisorders,Idk who to talk to about this so I resort to r...,,Just ranting ig,1,,,,,...,,/r/EatingDisorders/comments/10pjinj/just_ranti...,https://www.reddit.com/r/EatingDisorders/comme...,1675126245,133,0,0,0,-1,1
3,5,EatingDisorders,I don’t know where to start.. Due to my upbrin...,,Is true recovery possible?,1,,seeking support,,,...,,/r/EatingDisorders/comments/10pj90n/is_true_re...,https://www.reddit.com/r/EatingDisorders/comme...,1675125516,132,0,0,0,-1,1
4,8,EatingDisorders,i abused laxatives/purging/starving and now ev...,,i get “chronic” hiccups due to bulimia (take t...,1,,,,,...,,/r/EatingDisorders/comments/10perl4/i_get_chro...,https://www.reddit.com/r/EatingDisorders/comme...,1675114462,171,0,0,0,-1,1


In [6]:
# Groupby username and subreddit
positive_user = positive_df.groupby(["author", "subreddit_id"]).agg({"selftext": lambda x: list(x), "created_utc": lambda x: list(x), "link_flair_text": lambda x: list(x), "title": lambda x: list(x)}).reset_index()
positive_user

Unnamed: 0,author,subreddit_id,selftext,created_utc,link_flair_text,title
0,--UsernameNotFound--,t5_2zzq8,[I struggling whit body image and every time I...,[1609984721],[nan],[Can this be an Ed or just life]
1,-CreamyPie-,t5_o3plh,[Hello!\n\nIm 11 and Im a boy self recovering ...,"[1588343070, 1588190127, 1587836033]","[recovery win, nan, nan]","[Fear Food Friday!! So far its successful, Fea..."
2,-Hawkeye,t5_o3plh,[My ex has disclosed to me that she goes throu...,[1612196819],[nan],[Need Advice with Ex's Disorder]
3,-Perchta-,t5_2zzq8,"[So I think I may have an eating disorder, but...",[1569294268],[nan],[I need help with ways to keep food down]
4,-buppy-,t5_2tmc8,[I (F 18) have always been chubbier. While I h...,[1670794694],[nan],"[I know I need help, but I don’t want it.]"
...,...,...,...,...,...,...
5156,zoqna,t5_o3plh,[This post mentions s/icide and relapsing.\n\n...,[1622945156],[nan],[TW VENTING]
5157,zorosantoryuu,t5_2zzq8,[the other day my mom pointed out that i'm too...,[1614353148],[nan],[rant]
5158,zotheslasher,t5_o3plh,[i’ve been in recovery for anorexia for a year...,[1633066448],[support needed],[underweight to overweight]
5159,zuzuRAL,t5_2zzq8,[I'm in late 20s and some time ago I realized ...,[1627387166],[nan],[Don't know who to talk about it]


In [7]:
# Which author has more than 1 post ---- 
positive_user[positive_user['selftext'].apply(lambda x: len(x) > 1)].reset_index(drop=True)

Unnamed: 0,author,subreddit_id,selftext,created_utc,link_flair_text,title
0,-CreamyPie-,t5_o3plh,[Hello!\n\nIm 11 and Im a boy self recovering ...,"[1588343070, 1588190127, 1587836033]","[recovery win, nan, nan]","[Fear Food Friday!! So far its successful, Fea..."
1,194569324,t5_s5o7i,[Just want to get this off my chest because I ...,"[1587130632, 1586178189]","[nan, nan]","[Hardcore restricting, close to being discharg..."
2,197326743251b,t5_2tmc8,[i can eat without rules \n\nim ok with not pu...,"[1631492734, 1631058338, 1628161076]","[:snoo_thoughtful: help? :snoo_biblethump:, :o...","[body image is the last thing to go, sensory o..."
3,40sareinteresting,t5_2tmc8,[I have been off and on bp for 20 years. It’s ...,"[1672395862, 1668588759]","[:snoo_thoughtful: help? :snoo_biblethump:, nan]","[Binging and gaining weight fast, How to truly..."
4,50gayrats,t5_rbmui,[Today my dad called go make sure I ate I said...,"[1673426889, 1672307211]","[tw, family vent]","[Eating disorder Health scare, My brother is s..."
...,...,...,...,...,...,...
899,yellmoe,t5_rbmui,"[My dad lives continents away from me, and usu...","[1674673942, 1674452520, 1673537087]","[recovery wins!, tw, story time]","[I cooked with my dad today!!, Freaked out by ..."
900,you_enjoy_my_yoga,t5_2tmc8,[I can’t take this anymore. I need to free mys...,"[1672414828, 1668125294]","[nan, nan]","[Recovery as a New Years resolution, Addicted ..."
901,yourbaepuppy,t5_2ydy9,[Ever since I wad young I've always been on th...,"[1675009767, 1674955710]","[vent, vent]",[I feel like my parents are the reason why I'm...
902,yuyuanmi,t5_2zzq8,"[The transition to college, plus some other li...","[1580011342, 1578964660]","[nan, nan]",[Can exercise be a healthy coping mechanism? O...


In [8]:
# Saving aggregated df to csv ---- 
positive_user.to_csv("../data/user_positive.csv", index=False)