In [1]:
import json
from pymongo import MongoClient
import os
import bson
from bson import decode_all
from bson.json_util import dumps
import shutil
from dotenv import load_dotenv

In [None]:
load_dotenv()
mongo_uri = os.getenv("MONGODB_URI")

client = MongoClient(mongo_uri)

for db_name in client.list_database_names():
    db = client[db_name]
    print(f"\nDatabase: {db_name}")
    for coll_name in db.list_collection_names():
        print(f"   └── Collection: {coll_name}")

db = client["test"]

collections = ["backgroundprofiles", "eegrecordings", "responses"]

output_dir = './bson_files'
os.makedirs(output_dir, exist_ok=True)


Database: test
   └── Collection: responses
   └── Collection: questions
   └── Collection: backgroundprofiles
   └── Collection: eegrecordings

Database: admin

Database: local
   └── Collection: oplog.rs


In [None]:
for name in collections:
    docs = list(db[name].find({})) 

    bson_file_path = os.path.join(output_dir, f"{name}.bson")

    with open(bson_file_path, "wb") as f:
        f.write(bson.BSON.encode({"documents": docs}))

    print(f"Exported {len(docs)} documents from '{name}' to '{bson_file_path}'")

In [None]:
# === BSON To JSON Conversion ===

bson_file_path_names = [
    'backgroundprofiles.bson',
    'eegrecordings.bson',
    'responses.bson'
]

json_file_path_names = [
    'backgroundprofiles.json',
    'eegrecordings.json',
    'responses.json'
]

for bson_file_path, json_file_path in zip(bson_file_path_names, json_file_path_names):
    bson_full_file = os.path.join("./bson_files/", bson_file_path)
    
    json_full_file = os.path.join("./json_files/", json_file_path) 

    with open(bson_full_file, 'rb') as f:
        data = decode_all(f.read())

    with open(json_full_file, "w") as outfile:
        outfile.write(dumps(data, indent=2))

In [None]:
# === Reconciling EEG & JSON Into Respective Folders ===

base_dir = './'
eeg_data_dir = os.path.join(base_dir, 'RSCA Survey Collection Data')
output_dir = os.path.join(base_dir, 'OrganizedRSCA')

os.makedirs(output_dir, exist_ok=True)

for survey_type in ['1', '2']:
    for vignette_type in ['control', 'growth', 'fixed']:
        vignette_path = os.path.join(output_dir, survey_type, vignette_type)
        os.makedirs(vignette_path, exist_ok=True)

        for entry in os.listdir(vignette_path):
            full_path = os.path.join(vignette_path, entry)
            if os.path.isdir(full_path):
                shutil.rmtree(full_path)

with open('./json_files/responses.json') as file:
    file_contents = file.read()
parsed_json = json.loads(file_contents)

# Get the list of response documents
doc_list = parsed_json[0]['documents']

# List all EEG files from the original data folder
eeg_file_list = [f for f in os.listdir(eeg_data_dir)]

for doc in doc_list:
    if 'surveyType' not in doc or 'vignetteType' not in doc or doc['__v'] < 10:
        continue

    session_id = doc['sessionId']
    survey_type = str(doc['surveyType'])
    vignette_type = doc['vignetteType']

    eeg_recording_paths = [file for file in eeg_file_list if session_id in file]

    dest_folder = os.path.join(output_dir, survey_type, vignette_type, session_id)
    os.makedirs(dest_folder, exist_ok=True)

    for eeg_file in eeg_recording_paths:
        src_path = os.path.join(eeg_data_dir, eeg_file)
        dest_path = os.path.join(dest_folder, eeg_file)
        shutil.copy(src_path, dest_path)

    json_path = os.path.join(dest_folder, f"{session_id}.json")
    with open(json_path, 'w') as json_out:
        json.dump(doc, json_out, indent=4)

    print(f"Processed: {session_id} to {dest_folder} with {len(eeg_recording_paths)} EEG files")