In [1]:
import os
import pymongo
import pandas as pd
from pprint import pprint
from dotenv import load_dotenv
import re
import spacy
from spacy.lang.en import English

In [6]:
project_folder = os.path.expanduser('../../')
load_dotenv(os.path.join(project_folder, '.env'))

MONGOUSER = os.getenv("MONGOUSER")
MONGOPASSWORD = os.getenv("MONGOPASSWORD")
MONGODBNAME = os.getenv("MONGODBNAME")

client = pymongo.MongoClient(f"mongodb+srv://{MONGOUSER}:{MONGOPASSWORD}@autotherapy.vfbkj.gcp.mongodb.net/{MONGODBNAME}?retryWrites=true&w=majority")
db = client.accounts # connect to my database

posts = db['posts'] # collection of posts
authors = db['authors'] # collection of authors

In [7]:
cursor = posts.find()
entries = list(cursor)

df = pd.DataFrame(entries)
df.tail()

Unnamed: 0,_id,account,post_id,likes,comments,date,content,hashtags,number_hashtags,img_text
14866,5f3873dc9ff725ae35d79ed2,yasminecheyenne,CD1O-6VJ2dm,716,30,2020-08-13 09:50:19,iâ€™m so excited to share my collaboration with ...,,0,but hereâ€™s a f Â» lee that you deserve Â« W Jenn...
14867,5f3873dd9ff725ae35d79ed3,yasminecheyenne,CD354l1pXke,2804,32,2020-08-14 10:43:41,speak it out loud loves! ðŸ—£ðŸ—£ðŸ—£\n,,0,Caleta sree vueB lela etaleyse Vibe ntoleysert...
14868,5f3873de9ff725ae35d79ed4,yasminecheyenne,CD6jGmrpRi0,909,17,2020-08-15 11:22:20,hereâ€™s to a weekend with some time for you x\n,,0,Caleta sree i deserve time for me. i donâ€™t hav...
14869,5f3889259ff725ae35d79ed5,minaa_b,CD1RoGRp9MF,6735,57,2020-08-13 10:13:25,Because self-care is community care.\nâ€”â€¢\nComm...,,0,"LIVE IN\nyouR LIGHT, AND\nALSO SHARE YyouR LIG..."
14870,5f3889269ff725ae35d79ed6,minaa_b,CD39wecpZfH,1747,24,2020-08-14 11:17:32,"#AD - During this time of collective healing, ...",ad,1,


In [20]:
df.iloc[14869, 9]

"LIVE IN\nyouR LIGHT, AND\nALSO SHARE YyouR LIGHT\nWITH SOMEONE WHOSE\nWORLD HAS GONE DARK\nAND NEEDS A REMINDER\nTHAT THEY'RE WORTHY.\nMB.\n"

In [18]:
df.iloc[14869, 6]

'Because self-care is community care.\nâ€”â€¢\nCommunity: how are you showing up for others today? (For me, being an active listener to others is how Iâ€™m showing up for my community)\n'

In [16]:
df.iloc[8555, 6]

"I hope you know that no matter what\nyou've been through, or how many\ntimes you've been let down that you\nare lovable, and so worthy of being\n\nseen, heard, and held. Your past didn't\nmake you broken, it made you wise.\n\nnt\n\x0c"

In [15]:
df.iloc[8555, 6][:165]

"I hope you know that no matter what\nyou've been through, or how many\ntimes you've been let down that you\nare lovable, and so worthy of being\n\nseen, heard, and held. "

In [9]:
nlp = English()

In [10]:
doc = nlp(df.iloc[8555, 6][:165])

print("Index:   ", [token.i for token in doc])
print("Text:    ", [token.text for token in doc])

print("is_alpha:", [token.is_alpha for token in doc])
print("is_punct:", [token.is_punct for token in doc])
print("like_num:", [token.like_num for token in doc])

Index:    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
Text:     ['I', 'hope', 'you', 'know', 'that', 'no', 'matter', 'what', '\n', 'you', "'ve", 'been', 'through', ',', 'or', 'how', 'many', '\n', 'times', 'you', "'ve", 'been', 'let', 'down', 'that', 'you', '\n', 'are', 'lovable', ',', 'and', 'so', 'worthy', 'of', 'being', '\n\n', 'seen', ',', 'heard', ',', 'and', 'held', '.']
is_alpha: [True, True, True, True, True, True, True, True, False, True, False, True, True, False, True, True, True, False, True, True, False, True, True, True, True, True, False, True, True, False, True, True, True, True, True, False, True, False, True, False, True, True, False]
is_punct: [False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, Fal

In [11]:
nlp = spacy.load("en_core_web_sm")

In [12]:
for token in doc:
    print(token.text, token.pos_)

I 
hope 
you 
know 
that 
no 
matter 
what 

 SPACE
you PRON
've AUX
been 
through 
, 
or 
how 
many 

 SPACE
times 
you PRON
've AUX
been 
let 
down 
that 
you 

 SPACE
are 
lovable 
, 
and 
so 
worthy 
of 
being 


 
seen 
, 
heard 
, 
and 
held 
. 


In [13]:
for token in doc:
    print(token.text, token.pos_, token.dep_, token.head.text)

I   I
hope   hope
you   you
know   know
that   that
no   no
matter   matter
what   what

 SPACE  

you PRON  you
've AUX  've
been   been
through   through
,   ,
or   or
how   how
many   many

 SPACE  

times   times
you PRON  you
've AUX  've
been   been
let   let
down   down
that   that
you   you

 SPACE  

are   are
lovable   lovable
,   ,
and   and
so   so
worthy   worthy
of   of
being   being


   


seen   seen
,   ,
heard   heard
,   ,
and   and
held   held
.   .


In [23]:
doc = nlp(df.iloc[14870, 6])

for ent in doc.ents:
    print(ent.text, ent.label_)

# CARDINAL
today DATE


In [24]:
df.iloc[14870, 6]

'#AD - During this time of collective healing, Iâ€™ve partnered with @BeVocal.SpeakUp, as a way to continue to amplify the voices of those who have struggled with mental health related issues and still feel shame when it comes to speaking up.\n\nMy own personal battle with depression as a child and young adult is what led me to this profession. I grew up dealing with emotional abuse from certain family members and always struggled with my self-worth and self-esteem. This is why the topic of self-care and boundaries is so important to me, because I remember the turmoil I went through when I didnâ€™t enforce them.\n\nI do not have the answers for why things happen to us the way they do, but I made a choice to use my life experiences as a catalyst for change and breaking generational dysfunction.\n\nMy story is not linear. It was hard, it was messy, it was painful and so much more that I canâ€™t even fit into words to try and share with you in a caption.\n\nWhat I do know is that being vo