In [1]:
import os

from sleep_events import connect_to_firebase

firebase = connect_to_firebase()

In [2]:
import pandas as pd

docs = firebase.collection('notesExperimental').stream()

note_records = [{**doc.to_dict(), 'id': doc.id} for doc in docs]

note_df = pd.DataFrame(note_records)

In [14]:
from datetime import datetime, timezone
import re
from bson import ObjectId


# Function to sanitize document IDs
def sanitize_document_id(doc_id):
    return re.sub(r'[^a-zA-Z0-9_-]', '_', doc_id)

# Sanitize document IDs in the notes
# Function to sanitize document IDs
def sanitize_document_id(doc_id):
    try:
        return re.sub(r'[^a-zA-Z0-9_-]', '_', doc_id)
    except TypeError as e:
        raise e

# Function to convert ISO 8601 timestamp to UNIX seconds
def convert_to_unix_seconds(timestamp):
    dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
    return int(dt.timestamp())

def convert_to_bson_date(timestamp):
    dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
    return dt

# These will be at UTC midnight
def convert_date_to_epoch(date_str):
    dt = datetime.strptime(date_str, '%Y-%m-%d').replace(tzinfo=timezone.utc)
    return int(dt.timestamp())


import copy

# Clone note_records
cloned_note_records = copy.deepcopy(note_records)

# Sanitize document IDs and convert timestamps in the notes
for note in cloned_note_records:
    if 'id' in note:
        if note['id'] == None:
            print("Skipping note with None ID " + str(note))
            continue
        note['_id'] = note['id']
        del note['id']
    # if 'dayAndNightOf' in note:
    #     note['dayAndNightOfEpoch'] = convert_date_to_epoch(note['dayAndNightOf'])
    try:
        del note['timestampWrittenUTC']
    except KeyError:
        pass
    try:
        del note['timestampForUTC']
    except KeyError:
        pass
    for key in ['timestampWritten', 'timestampFor', 'timestampUpdated']:
        if key in note:
            if type(note[key]) == str:
                note[key] = convert_to_bson_date(note[key])
            elif isinstance(note[key], datetime):
                note[key] = convert_to_bson_date(note[key].isoformat())
            elif note[key] is not None:
                print(note)
                raise ValueError(f"Unexpected type for {key}: {type(note[key])}")


    # for key, value in note.items():
    #     if isinstance(value, datetime):
    #         print(note)
    #         raise ValueError(f"Unexpected type for {key}: {type(note[key])}")

cloned_note_records[0]


{'timestampFor': datetime.datetime(2024, 5, 3, 15, 47, 17, 610000, tzinfo=datetime.timezone.utc),
 'tags': ['ritalin', 'inhibition', 'autistic'],
 'dayAndNightOf': '2024-05-03',
 'note': "Interesting ritalin thing.. It suppresses my inhibition enough to make me respond quicker, and that's really making me realise that I am a bit autistic. Maybe?",
 'timestampWritten': datetime.datetime(2024, 5, 3, 15, 48, 8, 446000, tzinfo=datetime.timezone.utc),
 '_id': '2024-05-03T16:48:08.446+01:00'}

In [4]:
import json

# Function to check if an object is JSON serializable
def is_json_serializable(obj):
    try:
        json.dumps(obj)
        return True
    except (TypeError, OverflowError):
        return False

# Filter cloned_note_records to find records that aren't JSON serializable
non_serializable_records = [
    note for note in cloned_note_records
    if not is_json_serializable(note)
]

# Display the filtered records
print(non_serializable_records)

[]


In [5]:
!pip install pymongo

Collecting pymongo
  Downloading pymongo-4.11-cp310-cp310-win_amd64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.11-cp310-cp310-win_amd64.whl (781 kB)
   ---------------------------------------- 0.0/781.4 kB ? eta -:--:--
   -------------------------- ------------- 524.3/781.4 kB 5.6 MB/s eta 0:00:01
   ---------------------------------------- 781.4/781.4 kB 2.2 MB/s eta 0:00:00
Downloading dnspython-2.7.0-py3-none-any.whl (313 kB)
Installing collected packages: dnspython, pymongo
Successfully installed dnspython-2.7.0 pymongo-4.11


In [7]:
import meilisearch
import json
import json
import re
import os
import dotenv
from pymongo import MongoClient

dotenv.load_dotenv("../../.env")

client = MongoClient(os.getenv('MONGO_URI'))
db = client['examined_life']
collection = db['notes']



In [16]:
collection.delete_many({})

DeleteResult({'n': 3, 'ok': 1.0}, acknowledged=True)

In [17]:
collection.insert_many(cloned_note_records)

InsertManyResult(['2024-05-03T16:48:08.446+01:00', '2024-05-03T22:16:32.279+01:00', '2024-05-04T10:04:52.842+01:00', '2024-05-04T22:48:56.424+01:00', '2024-05-05T08:48:45.111+01:00', '2024-05-05T09:30:29.410+01:00', '2024-05-05T20:06:57.395+01:00', '2024-05-05T21:00:01.182+01:00', '2024-05-05T21:12:45.877+01:00', '2024-05-05T21:15:00.832+01:00', '2024-05-05T23:07:09.078+01:00', '2024-05-06T10:36:46.684+01:00', '2024-05-06T17:53:19.429+01:00', '2024-05-06T17:53:53.285+01:00', '2024-05-06T17:54:48.561+01:00', '2024-05-06T20:38:12.672+01:00', '2024-05-06T23:32:23.573+01:00', '2024-05-07T08:34:02.179+01:00', '2024-05-07T09:55:18.501+01:00', '2024-05-07T11:48:00.898+01:00', '2024-05-07T13:31:44.987+01:00', '2024-05-07T16:43:37.828+01:00', '2024-05-07T21:13:20.746+01:00', '2024-05-07T21:34:28.237+01:00', '2024-05-07T22:37:26.939+01:00', '2024-05-07T23:02:04.717+01:00', '2024-05-08T07:59:43.004+01:00', '2024-05-08T10:08:32.706+01:00', '2024-05-08T22:44:36.711+01:00', '2024-05-08T23:21:13.200+