**On your Terminal:**

```bash
$ echo .env >> .gitignore

# ----
# WAY_1
# ----

$ touch .env
$ echo export MONGOUSER = your_username >> .env
$ echo export MONGOPASSWORD = your_password >> .env
$ echo export MONGODBNAME = your_database_name >> .env

# ----
# WAY_2
# ----

$ nano .env

# write in .env the next lines
# export MONGOUSER = 'your_username'
# export MONGOPASSWORD = 'your_password'
# export MONGODBNAME = 'your_database_name'
```
  
  
  
  
**On your Jupyter Notebook:**

```python
from dotenv import load_dotenv
import os


project_folder = os.path.expanduser('~/') # the folder of your project
load_dotenv(os.path.join(project_folder, '.env'))

MONGOUSER = os.getenv("MONGOUSER")
MONGOPASSWORD = os.getenv("MONGOPASSWORD")
MONGODBNAME = os.getenv("MONGODBNAME")

client = pymongo.MongoClient(f"mongodb+srv://{MONGOUSER}:{MONGOPASSWORD}@something.something.gcp.mongodb.net/{MONGODBNAME}?moresomething")
db = client['your_database_name'] # connect to my database

collection1 = db['collection1_name'] # one collection
collection2 = db['collection2_name'] # another collection
```

# Imports

In [1]:
import os
import pickle
import re
import string
import unicodedata as ud
from collections import defaultdict
from pprint import pprint

import emoji
import nltk
import numpy as np
import pandas as pd
import pymongo
from dotenv import load_dotenv
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer

from helpers import (connect_to_db, decontracted, extract_emoji_terms,
                     extract_hashtags, extract_mentions, extract_url,
                     precleaning, remove_hashtags_mentions_urls,
                     restore_spaced_title)

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/faustina/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# Connection to database

In [2]:
client = connect_to_db()
db = client.accounts # connect to my database

posts = db['posts'] # collection of posts
authors = db['authors'] # collection of authors

In [3]:
'The total amount of space allocated to collections in this database for document storage is: {:.2f} MB.\nThe database has {} posts from {} Instagram profiles.'.format(db.command("dbstats")['storageSize'] / 1e6, posts.count_documents({}), authors.count_documents({}))

'The total amount of space allocated to collections in this database for document storage is: 18.02 MB.\nThe database has 14839 posts from 18 Instagram profiles.'

In [4]:
query = {'post_id': { '$in': ['BTmK1y7D2Fh', 'BU_TvtHD9Fl', 'BVeMbpiD79i'] }}

for row in posts.find(query):
    print(pprint(row), '\n')

{'_id': ObjectId('5f3830969ff725ae35d78624'),
 'account': 'alyssamariewellness',
 'comments': 18,
 'content': "Welcome! I'm a mental health therapist providing services in the "
            'Los Angeles area. I strongly believe in the power of kindness , '
            'compassion, and healing. I am an advocate for women supporting '
            'women , humans supporting humans. While clinically trained in CBT '
            'and psychodynamic modalities, my main goal is to help you find '
            'peace, balance , and happiness within this thing called life. If '
            "you're troubled by anxiety, depression , trauma , I'm here to "
            'help ‚ú®\n'
            '‚û∞\n'
            '‚û∞\n'
            '‚û∞\n'
            '#mentalhealth #mentalhealthadvocate #counseling #endthestigma '
            '#therapy #selflove #gratitude #acceptance #peace #balance '
            '#purpose #mindfulness #love #health #wellness '
            '#mentalhealthawareness #mentalhealthawar

## Load posts to pandas

In [5]:
cursor = posts.find()
entries = list(cursor)

df = pd.DataFrame(entries)
df.head()

Unnamed: 0,_id,account,post_id,likes,comments,date,content,hashtags,number_hashtags,img_text,number_emojis,mentions,emoji_terms,pre_cleaned_text,url_email
0,5f34706c8091264adc7b3c0f,mindfulmft,ierq6xRnBL,55,1,2013-12-28 15:53:14,#therapy #positivequotes #quotes #marriage #re...,guidance challenge mentalhealth quotes positiv...,14,true humility is staying teachabl,0.0,,,,
1,5f34706d8091264adc7b3c11,mindfulmft,ietVO_RnD_,33,0,2013-12-28 16:07:45,#motivation #love #power #encourage #journey #...,journey power forward relationship encourage a...,14,YOU ARE FAR Coo. pnart- TO BE THE ONLY THING S...,0.0,,,,
2,5f34706d8091264adc7b3c12,mindfulmft,ietrkpRnEl,65,1,2013-12-28 16:10:48,#storms #accomplishments #roots #strength #liv...,storms wisdom forward relationship encourageme...,15,Storms make trees take deeper root ss - Dolly ...,0.0,,,,
3,5f34706d8091264adc7b3c13,mindfulmft,ieum0hxnF-,39,1,2013-12-28 16:18:53,#words #self #life #MINDFULMFT #mindfulness #c...,wisdom control encouragement lessons motivatio...,13,- Let anyone determine your self-worth. = Spea...,0.0,,,,
4,5f34706d8091264adc7b3c14,mindfulmft,jH1aVfxnDG,61,1,2014-01-13 15:27:13,#truth #wisdom #wise #think #act #motivation #...,think wisdom wise family counseling encourage ...,16,Most of the problems in life are because of tw...,0.0,,,,


In [6]:
entries

[{'_id': ObjectId('5f34706c8091264adc7b3c0f'),
  'account': 'mindfulmft',
  'post_id': 'ierq6xRnBL',
  'likes': 55,
  'comments': 1,
  'date': datetime.datetime(2013, 12, 28, 15, 53, 14),
  'content': '#therapy #positivequotes #quotes #marriage #relationships #couples #couplestherapy #inspiration #inspiring #selfhelp #growth #guidance #challenge #mentalhealth #mindfulMFT\n',
  'hashtags': 'guidance challenge mentalhealth quotes positivequotes inspiring relationships marriage therapy couplestherapy selfhelp inspiration couples growth',
  'number_hashtags': 14,
  'img_text': 'true humility is staying teachabl',
  'number_emojis': 0,
  'mentions': '',
  'emoji_terms': '',
  'pre_cleaned_text': '',
  'url_email': ''},
 {'_id': ObjectId('5f34706d8091264adc7b3c11'),
  'account': 'mindfulmft',
  'post_id': 'ietVO_RnD_',
  'likes': 33,
  'comments': 0,
  'date': datetime.datetime(2013, 12, 28, 16, 7, 45),
  'content': '#motivation #love #power #encourage #journey #keepmoving #forward #relation

In [7]:
#df['emoji_terms'] = df['content'].apply(lambda post: extract_emoji_terms(post))
#df['url_email'] = df['content'].apply(lambda post: extract_url(post))
#df['hashtags'] = df.apply(lambda post: extract_hashtags(post['content'], post['account'])[0], axis=1)
#df['number_hashtags'] = df.apply(lambda post: extract_hashtags(post['content'], post['account'])[1], axis=1)
df['pre_cleaned_text'] = df.apply(lambda post: precleaning(post['content'], post['account']), axis=1)

In [10]:
df.tail()

Unnamed: 0,_id,account,post_id,likes,comments,date,content,hashtags,number_hashtags,img_text,number_emojis,mentions,emoji_terms,pre_cleaned_text,url_email
14861,5f3873dc9ff725ae35d79ed2,yasminecheyenne,CD1O-6VJ2dm,716,30,2020-08-13 09:50:19,i‚Äôm so excited to share my collaboration with ...,,0,but here‚Äôs a f ¬ª lee that you deserve ¬´ W Jenn...,5.0,the_qi_ greetabl palermobody,raising hands hugging face medium dark skin to...,i am so excited to share my collaboration with...,
14862,5f3873dd9ff725ae35d79ed3,yasminecheyenne,CD354l1pXke,2804,32,2020-08-14 10:43:41,speak it out loud loves! üó£üó£üó£\n,,0,Caleta sree vueB lela etaleyse Vibe ntoleysert...,3.0,,speaking head,speak it out loud loves!,
14863,5f3873de9ff725ae35d79ed4,yasminecheyenne,CD6jGmrpRi0,909,17,2020-08-15 11:22:20,here‚Äôs to a weekend with some time for you x\n,,0,Caleta sree i deserve time for me. i don‚Äôt hav...,0.0,,,here is to a weekend with some time for you x,
14864,5f3889259ff725ae35d79ed5,minaa_b,CD1RoGRp9MF,6735,57,2020-08-13 10:13:25,Because self-care is community care.\n‚Äî‚Ä¢\nComm...,,0,"LIVE IN\nyouR LIGHT, AND\nALSO SHARE YyouR LIG...",0.0,,,Because self-care is community care. Community...,
14865,5f3889269ff725ae35d79ed6,minaa_b,CD39wecpZfH,1747,24,2020-08-14 11:17:32,"#AD - During this time of collective healing, ...",ad,1,,0.0,bevocal.speakup,,"During this time of collective healing, I have...",


In [16]:
for post in posts.aggregate(
    [{"$group" : { "_id": "$name", "count": { "$sum": 1 } } },
    {"$match": {"count" : {"$gt": 1} } }, 
    {"$sort": {"count" : -1} },
    {"$project": {"name" : "$_id", "_id" : 0} } ]    
):
    print(post)

{'name': None}


In [20]:
df[df.columns[1:]].isna().sum()

account              0
post_id              0
likes                0
comments             0
date                 0
content              0
hashtags             0
number_hashtags      0
img_text            27
number_emojis        0
mentions             0
emoji_terms          0
pre_cleaned_text     0
url_email            0
dtype: int64

In [18]:
post_to_update = df[df['pre_cleaned_text'] != df['pre_cleaned_text'].min()][['post_id', 'pre_cleaned_text']].to_dict(orient='records')
post_to_update

[{'post_id': 'jSPfFJRnO0', 'pre_cleaned_text': 'Explore your passions.'},
 {'post_id': 'jxGTWdRnBg',
  'pre_cleaned_text': 'You do not know anyone else is story. It does not mean that they are handling themselves the right way, but you are in no place to judge whatever their challenges are. Live your life in a way that assumes everyone has something . Be kind to them anyway.'},
 {'post_id': 'jzA-6RxnGR',
  'pre_cleaned_text': 'A great message. It hurt because it mattered and that is OKAY.'},
 {'post_id': 'kAZ8vXxnMD',
  'pre_cleaned_text': 'Do not let fears or self doubt get in the way. Do not just show up. Go live.'},
 {'post_id': 'kCbyGbxnEu',
  'pre_cleaned_text': 'With every obstacle you have a choice: to let it own you, or to recognize that it is serving a purpose and to learn and grow from it. How will you overcome your obstacles? How will you define them.'},
 {'post_id': 'kCnBfdxnDx',
  'pre_cleaned_text': 'Showing your emotions is a sign of strength! Do not let others discourag

In [19]:
post_to_update[0]

{'post_id': 'jSPfFJRnO0', 'pre_cleaned_text': 'Explore your passions.'}