This notebook analyses how many times certain craving trigger categories appear in the craving set grouped by author that was created with "RedditAnalyzer"

In [None]:
from google.colab import data_table
import pandas as pd
data_table.enable_dataframe_formatter()

#retrieve posts from csv
grouped = pd.read_csv("grouped_posts.csv")
grouped = grouped.drop(['level_0', 'index'], axis=1)
grouped = grouped.rename(columns={"Unnamed: 2": "author"})
grouped



Unnamed: 0,author,join_posts,count_posts
0,ginger_sprout,In which I fuck everything up and then have a ...,97
1,Ohhshitfuck,I relapsed….\nIt happened Thursday night. It w...,78
2,LunaValley,Went for a drink with a friend last night and ...,75
3,SaintHomer,"The Daily Check-In for Friday, March 25th: Jus...",51
4,AlecASaurus,I Know I Can Do Better\nAnd it kind of scares ...,48
...,...,...,...
24430,Sree58,Successful Day 3 of the 30-day Abstinence prom...,1
24431,SrLOchris,Hanging with my drinking buddy of 15 years ton...,1
24432,Squooooosh,Day 5 - Challenges coming soon\nStarted on thi...,1
24433,SquishyFaceDogs10,Drank 6 bud lights last night after my gf left...,1


In [None]:
#retrieve similar terms that were created with the help of "similarity_analyser"
similar_terms = pd.read_csv("curated_synonyms.csv")
similar_terms



Unnamed: 0,alone,alone_sim,alone_count,friend,friend_sim,friend_count,family,family_sim,family_count,partner,...,happy_count,proud,proud_sim,proud_count,bored,bored_sim,bored_count,weekend,smoke,smoke_count
0,alone,1.0,5788.0,friend,1.0,16991.0,family,1.0,8007.0,partner,...,6215.0,proud,1.0,3411.0,bored,1.0,1315.0,friday,smoke,1213.0
1,isolated,0.555,383.0,buddy,0.691,776.0,parent,0.627,2404.0,girlfriend,...,,accomplished,0.633,273.0,,,,saturday,cigarette,1102.0
2,lonely,0.5,1255.0,,0.655,17545.0,dad,0.614,2028.0,husband,...,,accomplishment,,,boredom,0.584,972.0,sunday,weed,1598.0
3,cooped,0.429,24.0,,0.646,1080.0,brother,0.598,1330.0,boyfriend,...,1290.0,milestone,,,boring,0.572,717.0,weekend,smoking,1914.0
4,,,,,0.562,810.0,mother,0.583,1552.0,wife,...,2591.0,,,,,,,end week,smoked,437.0
5,quarantining,0.444,14.0,meet,0.514,952.0,wife,0.572,4241.0,relationship,...,22409.0,,,,,,,,cigs,135.0
6,,,,pregaming,0.617,12.0,sister,0.57,1100.0,ex,...,955.0,,,,cooped,0.53,24.0,,smoker,180.0
7,,,,groomsman,0.596,16.0,mom,0.568,2478.0,bf,...,25188.0,,,,unmotivated,0.449,76.0,,nicotine,296.0
8,,,,friendsgiving,0.582,13.0,father,0.543,1337.0,spouse,...,748.0,,,,,0.443,209.0,,juul,16.0
9,,,,bros,0.538,13.0,sibling,0.527,181.0,fiance,...,2991.0,,,,,0.44,1183.0,,puff,30.0


## Attribute anaylsis

In [None]:
!pip install nltk
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import RegexpTokenizer
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

###Preprocessing

In [None]:
grouped['post'] = grouped['join_posts']
grouped['post'].dropna(inplace=True)
grouped['post'] = grouped['post'].str.lower()

tokenizer = RegexpTokenizer(r'\w+')
grouped['post_tokens'] = grouped['post'].astype(str).apply(tokenizer.tokenize)

# Make a list of english stopwords
stopwords = nltk.corpus.stopwords.words("english")
grouped['post_tokens'] = grouped['post_tokens'].apply(lambda x: [item for item in x if item not in stopwords])

wordnet_lem = WordNetLemmatizer()
grouped['post_lemmas'] = grouped['post_tokens'].apply(lambda l: [wordnet_lem.lemmatize(x) for x in l])
grouped['processed_post'] = grouped['post_lemmas'].apply(lambda l: ' '.join(l))
grouped



Unnamed: 0,author,join_posts,count_posts,post,post_tokens,post_lemmas,processed_post
0,ginger_sprout,In which I fuck everything up and then have a ...,97,in which i fuck everything up and then have a ...,"[fuck, everything, white, lightning, experienc...","[fuck, everything, white, lightning, experienc...",fuck everything white lightning experience sev...
1,Ohhshitfuck,I relapsed….\nIt happened Thursday night. It w...,78,i relapsed….\nit happened thursday night. it w...,"[relapsed, happened, thursday, night, alcohol,...","[relapsed, happened, thursday, night, alcohol,...",relapsed happened thursday night alcohol actua...
2,LunaValley,Went for a drink with a friend last night and ...,75,went for a drink with a friend last night and ...,"[went, drink, friend, last, night, embarrassin...","[went, drink, friend, last, night, embarrassin...",went drink friend last night embarrassing fast...
3,SaintHomer,"The Daily Check-In for Friday, March 25th: Jus...",51,"the daily check-in for friday, march 25th: jus...","[daily, check, friday, march, 25th, today, dri...","[daily, check, friday, march, 25th, today, dri...",daily check friday march 25th today drinking m...
4,AlecASaurus,I Know I Can Do Better\nAnd it kind of scares ...,48,i know i can do better\nand it kind of scares ...,"[know, better, kind, scares, drink, sober, wak...","[know, better, kind, scare, drink, sober, wake...",know better kind scare drink sober wake see cl...
...,...,...,...,...,...,...,...
24430,Sree58,Successful Day 3 of the 30-day Abstinence prom...,1,successful day 3 of the 30-day abstinence prom...,"[successful, day, 3, 30, day, abstinence, prom...","[successful, day, 3, 30, day, abstinence, prom...",successful day 3 30 day abstinence promise day...
24431,SrLOchris,Hanging with my drinking buddy of 15 years ton...,1,hanging with my drinking buddy of 15 years ton...,"[hanging, drinking, buddy, 15, years, tonight,...","[hanging, drinking, buddy, 15, year, tonight, ...",hanging drinking buddy 15 year tonight everybo...
24432,Squooooosh,Day 5 - Challenges coming soon\nStarted on thi...,1,day 5 - challenges coming soon\nstarted on thi...,"[day, 5, challenges, coming, soon, started, ne...","[day, 5, challenge, coming, soon, started, new...",day 5 challenge coming soon started new path p...
24433,SquishyFaceDogs10,Drank 6 bud lights last night after my gf left...,1,drank 6 bud lights last night after my gf left...,"[drank, 6, bud, lights, last, night, gf, left,...","[drank, 6, bud, light, last, night, gf, left, ...",drank 6 bud light last night gf left work work...


In [None]:
#create dictionary from trigger (or concept) to a list of simlar terms that were retrieved earlier
concepts = ["alone", "friend", "family", "partner", "colleague", "home", "university", "work", "restaurant", "party", "workout", "supermarket", "airport", "anxious", "sad", "stressed", "tired", "frustrated", "happy", "proud", "bored","weekend","smoke"]
concept_dict = {}
for c in concepts:
    concept_dict[c] = similar_terms[c].dropna().tolist()

print(concept_dict)

{'alone': ['alone', 'isolated', 'lonely', 'cooped', 'quarantining'], 'friend': ['friend', 'buddy', 'meet', 'pregaming', 'groomsman', 'friendsgiving', 'bros'], 'family': ['family', 'parent', 'dad', 'brother', 'mother', 'wife', 'sister', 'mom', 'father', 'sibling', 'cousin', 'grandma', 'aunt', 'stepmom', 'grandpa', 'grandparent', 'grandchild', 'paternal'], 'partner': ['partner', 'girlfriend', 'husband', 'boyfriend', 'wife', 'relationship', 'ex', 'bf', 'spouse', 'fiance', 'gf', 'unfaithful', 'fiancé', 'significant other', 'my so'], 'colleague': ['colleague', 'coworkers', 'ceo', 'manager', 'conference', 'networking', 'boss', 'meetups', 'supervisor'], 'home': ['home', 'house', 'bed', 'kitchen', 'living room'], 'university': ['university', 'college', 'school', 'uni', 'campus', 'dorm', 'classroom', 'professor', 'teacher'], 'work': ['work', 'working', 'wfh', 'worked', 'job', 'office', 'shift', 'job'], 'restaurant': ['restaurant', 'bar', 'cocktail', 'menu', 'server', 'waitress', 'waiter', 'bart

###Analysis

In [None]:
number_authors = 24435
for c in concepts:
    #print(c, ":", concept_dict[c])
    occ = grouped['processed_post'].str.contains('\s('+'|'.join(concept_dict[c])+')\s').value_counts()[True]*100.0/number_authors
    print(c, ":",round(occ,2),"\%")
print("non-weekend",grouped['processed_post'].str.contains('\s('+'|'.join(['monday','tuesday','wednesday','thursday','friday'])+')\s').value_counts()[True]*100.0/number_authors)
print("depression",grouped['processed_post'].str.contains('\s('+'|'.join(['depression','depressed','depressive'])+')\s').value_counts()[True]*100.0/number_authors)
print("anxiety",grouped['processed_post'].str.contains('\s('+'|'.join(['anxious','anxiety'])+')\s').value_counts()[True]*100.0/number_authors)
print("afternoon",grouped['processed_post'].str.contains('\s('+'|'.join(['after lunch','afternoon','lunch'])+')\s').value_counts()[True]*100.0/number_authors)
print("evening",grouped['processed_post'].str.contains('\s('+'|'.join(['evening','night'])+')\s').value_counts()[True]*100.0/number_authors)

  after removing the cwd from sys.path.


alone : 18.06 \%
friend : 33.62 \%
family : 35.83 \%
partner : 36.41 \%
colleague : 3.71 \%
home : 38.99 \%
university : 9.81 \%
work : 49.61 \%
restaurant : 16.74 \%
party : 13.0 \%
workout : 18.9 \%
supermarket : 16.07 \%
airport : 2.14 \%
anxious : 45.01 \%
sad : 30.44 \%
stressed : 16.76 \%
tired : 18.11 \%
frustrated : 24.69 \%
happy : 34.42 \%
proud : 13.53 \%
bored : 8.87 \%
weekend : 26.54 \%
smoke : 12.02 \%


  


non-weekend 14.589727849396358


  import sys


depression 14.098629015756087


  


anxiety 23.683241252302025


  if __name__ == '__main__':


afternoon 6.658481686105995


  # Remove the CWD from sys.path while we load stuff.


evening 43.981993042766526


In [None]:
#restructure into a one-hot encoding
craving_set = pd.DataFrame()
craving_set['processed_post'] = grouped['processed_post']
craving_set['post'] = grouped['join_posts']
for c in concepts:
    craving_set[c] =  grouped['processed_post'].str.contains('\s('+'|'.join(concept_dict[c])+')\s')
craving_set


  """




Unnamed: 0,processed_post,post,alone,friend,family,partner,colleague,home,university,work,...,anxious,sad,stressed,tired,frustrated,happy,proud,bored,weekend,smoke
0,fuck everything white lightning experience sev...,In which I fuck everything up and then have a ...,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
1,relapsed happened thursday night alcohol actua...,I relapsed….\nIt happened Thursday night. It w...,True,True,True,True,False,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2,went drink friend last night embarrassing fast...,Went for a drink with a friend last night and ...,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,False
3,daily check friday march 25th today drinking m...,"The Daily Check-In for Friday, March 25th: Jus...",True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,False,True,True
4,know better kind scare drink sober wake see cl...,I Know I Can Do Better\nAnd it kind of scares ...,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24430,successful day 3 30 day abstinence promise day...,Successful Day 3 of the 30-day Abstinence prom...,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
24431,hanging drinking buddy 15 year tonight everybo...,Hanging with my drinking buddy of 15 years ton...,False,True,True,True,False,False,False,False,...,False,False,False,False,False,False,False,False,True,True
24432,day 5 challenge coming soon started new path p...,Day 5 - Challenges coming soon\nStarted on thi...,True,False,True,True,False,False,False,True,...,True,False,False,False,False,False,False,False,True,False
24433,drank 6 bud light last night gf left work work...,Drank 6 bud lights last night after my gf left...,False,False,False,True,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False


In [None]:
craving_set.to_csv('craving_set.csv', header=True, index=False)