Import required libraries

In [1]:
import pandas as pd
import praw
import pymongo
import pprint

Read in MongoDB credentials

In [2]:
with open('./db.ini', 'r') as f:
    db_username = f.readline().strip().split('=')[1]
    db_password = f.readline().strip().split('=')[1]

Establish connection to MongoDB database

In [3]:
client = pymongo.MongoClient(f"mongodb+srv://{db_username}:{db_password}@cluster0.umm6y0d.mongodb.net/?retryWrites=true&w=majority")
db = client.goodnotes
collection = db.subreddits

Print one of the documents from the collection

In [4]:
pprint.pprint(collection.find_one())

{'_id': ObjectId('63acdf87b120b0c7e6a4dd2f'),
 'body': 'Hello! New to GoodNotes, I have found all the digital templates on '
         'Etsy but I am looking for JUST an exercise tracker that does not '
         'come with meal planner and all the extra stuff. I realize I could '
         'make one but just seeing if anyone knows of or has a premade one. '
         'Thanks! :)',
 'category': 'Templates',
 'comments_list': [],
 'created_timestamp': datetime.datetime(2022, 12, 28, 21, 20, 28),
 'id': 'zxld5j',
 'num_comments': 0,
 'score': 1,
 'title': 'Looking for just an exercise tracker',
 'url': 'https://www.reddit.com/r/GoodNotes/comments/zxld5j/looking_for_just_an_exercise_tracker/'}


Show only the post title of one document from the collection

In [5]:
pprint.pprint(collection.find_one()['title'])

'Looking for just an exercise tracker'


Print the total number of documents in the collection

In [6]:
collection.count_documents({})

500

Print a list of IDs of documents with at least one comment with a missing author

In [7]:
ids = []
for post in collection.find({'comments_list.author': None}, {'_id': 1, 'comments_list.author': 1}):
    ids.append(str(post['_id']))

In [8]:
print(ids)

['63ace135b120b0c7e6a4ddb4', '63ace161b120b0c7e6a4dddf', '63ace17cb120b0c7e6a4ddfa', '63ace1a5b120b0c7e6a4de23', '63ace1abb120b0c7e6a4de28', '63ace1d0b120b0c7e6a4de4d', '63ace246b120b0c7e6a4de66', '63ace35eb120b0c7e6a4deae', '63ace362b120b0c7e6a4deb3', '63ace3c0b120b0c7e6a4df10', '63ace3d0b120b0c7e6a4df1f']


In [9]:
for post in collection.find({'num_comments': {'$gt': 0}}):
    print_flag = False
    for comment in post['comments_list']:
        if comment['author'] is None:
            print_flag = True
            break
    if print_flag:
        print(post['_id'])

63ac25af1696aa4c3cd128b2
63ac25da1696aa4c3cd128dd
63ac25f51696aa4c3cd128f8
63ac261f1696aa4c3cd12921
63ac26231696aa4c3cd12926
63ac26491696aa4c3cd1294b
63ac26751696aa4c3cd12964
63ac27d71696aa4c3cd129ac
63ac27db1696aa4c3cd129b1
63ac28391696aa4c3cd12a0e
63ac28491696aa4c3cd12a1d
63ac28521696aa4c3cd12a26


Search for all documents containing the keywords ‘pen’ or ‘pencil’ in the post title and give a list of the post titles

In [9]:
pen_pencil_titles = []
for post in collection.find({'title': {'$regex': '\\bpen(cil)?\\b', '$options': 'i'}}, {'title': 1}):
    pen_pencil_titles.append(post['title'])

In [10]:
pen_pencil_titles

['How Can I Decrease The Pen Sensitivity? (Not The Pen Pressure Sensitivity)',
 "do any of you use the MacBook app for GN? handwriting on iPad didn't work for me and I plan to use it for creativity and brainstorming etc. And im looking into typed notes, but where I can view on my iPad to highlight etc. I'm visually impaired and I don't think paper and pencil will suffice for me.",
 'Apple Pencil Pressure sensitivity not working',
 'Apple 2 pencil double tap won’t work, could it actually be goodnotes or is it my pencil?',
 'Is there a notable difference between Apple Pen Gens 1&2?',
 'Question. I’m sure this has been answered before, but too lazy to scroll down. But can one turn off the double tap on the Apple Pencil and not have automatically switch on to the eraser. It is frustratingly annoying.',
 'Any way to get Goodnotes’ pens to behave more like Apple Notes’ new fountain pen tool?',
 'Double tap to switch from pen to highlighter not working?',
 'Anyone else have this problem/knows

Store search result in a dataframe and examine dataframe

In [11]:
def get_data_with_keywords(query):
    ids = []
    titles = []
    replies = []
    num_threads = 0

    for idx, val in enumerate(query):
        # add to the lists
        ids.append(val['_id'])
        titles.append(val['title'])
        replies_arr = []
        
        num_threads += 1

        for reply in val['comments_list']: 
            text = reply['comment_body'].replace('\n','') # want to get rid of the \n\n
            replies_arr.append(text)
        replies.append(replies_arr)
    
    df = pd.DataFrame({'id': ids, 'thread_title': titles, 'reply': replies})
    df = df.explode('reply') # explode() to go from item in arr to row

    print(f"Number of Threads: {num_threads}")
    print(f"Number of Comments: {df.shape[0]}")

    return df

In [12]:
query = collection.find({'title': {'$regex': '\\bpen(cil)?\\b', '$options': 'i'}}, {'title': 1, 'comments_list': 1})
query_data = get_data_with_keywords(query)

Number of Threads: 18
Number of Comments: 141


In [13]:
query_data.id.value_counts()

63ace1ceb120b0c7e6a4de4c    29
63ace1f5b120b0c7e6a4de5e    22
63ace0efb120b0c7e6a4dd6e    16
63ace3a3b120b0c7e6a4def2     9
63ace383b120b0c7e6a4ded3     9
63ace1b1b120b0c7e6a4de2e     7
63ace3bbb120b0c7e6a4df0b     7
63ace1adb120b0c7e6a4de2a     7
63ace1d5b120b0c7e6a4de53     6
63ace0ecb120b0c7e6a4dd6c     6
63ace1c8b120b0c7e6a4de46     5
63ace15db120b0c7e6a4dddb     4
63ace32cb120b0c7e6a4de7e     4
63ace0e3b120b0c7e6a4dd63     4
63ace0d3b120b0c7e6a4dd55     2
63ace324b120b0c7e6a4de7c     2
63ace3b9b120b0c7e6a4df09     1
63acdfb0b120b0c7e6a4dd37     1
Name: id, dtype: int64

In [14]:
query_data.head()

Unnamed: 0,id,thread_title,reply
0,63acdfb0b120b0c7e6a4dd37,How Can I Decrease The Pen Sensitivity? (Not T...,
1,63ace0d3b120b0c7e6a4dd55,do any of you use the MacBook app for GN? hand...,I think OneNote would be the way to go then. I...
1,63ace0d3b120b0c7e6a4dd55,do any of you use the MacBook app for GN? hand...,"I don't care for GN's typing in text boxes, bu..."
2,63ace0e3b120b0c7e6a4dd63,Apple Pencil Pressure sensitivity not working,Delet app and redownload
2,63ace0e3b120b0c7e6a4dd63,Apple Pencil Pressure sensitivity not working,Have you tried a new tip yet?


In [15]:
query_data.thread_title.value_counts()

Apple Pencil + Paperlike Screen protector                                                                                                                                                                                                                                                                       29
New to tablet and GoodNotes. Any ideas how I can get rid of those random lines? :c Using iOS 16 on iPad Pro (6. Gen) and a 2nd Gen Pencil                                                                                                                                                                       22
Is there a notable difference between Apple Pen Gens 1&2?                                                                                                                                                                                                                                                       16
iPadOS 16.1 Out Today - Finally More Quick Access to the Toolbar with Apple Pen