In [1]:
# Cell 1: Imports and Firebase Initialization
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import datetime # For Timestamps

# --- IMPORTANT SETUP ---
# Replace 'path/to/your/serviceAccountKey.json' with the actual path to your downloaded key
CRED_PATH = "G:/FIRESTOREKEYS/english-language-art-helper-firebase-adminsdk-fbsvc-e5e8256254.json" # MAKE SURE TO UPDATE THIS

try:
    # Check if the app is already initialized to prevent re-initialization error in Notebooks
    if not firebase_admin._apps:
        cred = credentials.Certificate(CRED_PATH)
        firebase_admin.initialize_app(cred)
        print("Firebase Admin SDK initialized successfully.")
    else:
        print("Firebase Admin SDK already initialized.")
except Exception as e:
    print(f"Error initializing Firebase Admin SDK: {e}")
    print("Please ensure your service account key JSON file is correctly placed and the path is updated.")
    # In a notebook, you might want to raise the exception or handle it differently
    # For now, we'll just print and potentially exit if we were in a script.
    # If this cell fails, subsequent cells will also fail.

db = firestore.client()
print("Firestore client obtained.")
    

Firebase Admin SDK initialized successfully.
Firestore client obtained.


In [4]:
# Cell to define the function for getting database structure as a dictionary

import json
import datetime

# --- IMPORTANT: CONFIGURATION ---
# This path will be needed when you call the main function or initialize Firebase separately.
# SERVICE_ACCOUNT_KEY_PATH = "path/to/your/serviceAccountKey.json" # <--- UPDATE THIS WHEN INITIALIZING
# --- END CONFIGURATION ---

# Global variable for the Firestore database client
# Ensure this is initialized before calling get_database_samples_as_dict
db_firestore_for_schema_dump = None

def _datetime_converter_for_json(o):
    """Converts datetime objects to ISO format string for JSON serialization."""
    if isinstance(o, datetime.datetime):
        return o.isoformat()
    # Add handling for other Firestore-specific types if they cause issues with json.dumps
    # For example, GeoPoint, or if you store Blob, etc.
    # raise TypeError(f"Object of type {o.__class__.__name__} is not JSON serializable")

def _fetch_samples(collection_ref, n_samples):
    """Helper to fetch n sample documents from a collection reference."""
    docs_data = []
    if not collection_ref: # Should be db_firestore_for_schema_dump
        print(f"Error: Firestore client not available for fetching from {collection_ref.path if hasattr(collection_ref, 'path') else 'unknown path'}")
        return docs_data
        
    try:
        docs_stream = collection_ref.limit(n_samples).stream()
        for doc_snapshot in docs_stream:
            if doc_snapshot.exists:
                doc_dict = doc_snapshot.to_dict()
                doc_dict['_id'] = doc_snapshot.id # Add document ID to the dictionary
                docs_data.append(doc_dict)
    except Exception as e:
        print(f"Error fetching samples from collection path '{collection_ref.path if hasattr(collection_ref, 'path') else 'unknown'}': {e}")
    return docs_data

def get_database_samples_as_dict(n_samples_per_collection=1):
    """
    Queries n sample documents from known collections and subcollections
    and returns a nested dictionary representing the database structure with these samples.

    Args:
        n_samples_per_collection (int): The number of sample documents to fetch
                                        from each collection and subcollection.

    Returns:
        dict: A nested dictionary representing the database structure with samples.
              Returns None if Firestore client is not initialized.
    """
    global db_firestore_for_schema_dump # Ensure we're using the global client

    if not db_firestore_for_schema_dump:
        print("Error: Firestore client (db_firestore_for_schema_dump) is not initialized. Call initialize_firebase_for_schema_dump() first.")
        return None

    print(f"Starting database structure sampling (fetching {n_samples_per_collection} sample(s) per collection/subcollection)...")
    print("WARNING: This will include actual data. Review output carefully for sensitive information.")

    database_structure = {}

    # Define collections and their subcollection structures
    # Structure: { 'collection_name': { 'subcollections': ['sub_col_1', 'sub_col_2'] } or None }
    schema_map = {
        "levels": None,
        "vocabularyItems": None,
        "quizzes": {"subcollections": ["questions"]},
        "articles": {"subcollections": ["questions"]},
        "users": {
            "subcollections": [
                "vocabularyProgress",
                "quizAttempts",
                "articleProgress",
            ]
        },
    }

    for collection_name, structure_info in schema_map.items():
        print(f"\nProcessing collection: '{collection_name}'")
        collection_ref = db_firestore_for_schema_dump.collection(collection_name)
        top_level_docs_data = _fetch_samples(collection_ref, n_samples_per_collection)
        
        if top_level_docs_data:
            database_structure[collection_name] = top_level_docs_data

            # Process subcollections if defined
            if structure_info and 'subcollections' in structure_info:
                for parent_doc_data in top_level_docs_data: # Iterate through the samples fetched
                    parent_doc_id = parent_doc_data.get('_id')
                    if not parent_doc_id:
                        print(f"  Skipping subcollections for a document in '{collection_name}' because it's missing '_id'.")
                        continue
                    
                    parent_doc_data['_subcollections'] = {} # Add a key for subcollection data
                    
                    for sub_collection_name in structure_info['subcollections']:
                        print(f"  Processing subcollection: '{collection_name}/{parent_doc_id}/{sub_collection_name}'")
                        sub_collection_ref = db_firestore_for_schema_dump.collection(collection_name).document(parent_doc_id).collection(sub_collection_name)
                        sub_docs_data = _fetch_samples(sub_collection_ref, n_samples_per_collection)
                        if sub_docs_data:
                            parent_doc_data['_subcollections'][sub_collection_name] = sub_docs_data
                        else:
                            parent_doc_data['_subcollections'][sub_collection_name] = [] # Empty list if no samples
        else:
            database_structure[collection_name] = [] # Empty list if no samples in top-level collection
        print(f"Finished processing collection: '{collection_name}'")


    print("\nDatabase structure sampling complete.")
    print("REMINDER: The returned dictionary contains actual data. Sanitize before sharing if necessary.")
    return database_structure

def initialize_firebase_for_schema_dump(key_path):
    """Initializes Firebase Admin SDK if not already done, for this specific utility."""
    global db_firestore_for_schema_dump
    if firebase_admin._apps:
        print("Firebase Admin SDK already initialized.")
    else:
        try:
            cred = credentials.Certificate(key_path)
            firebase_admin.initialize_app(cred, name='schemaDumpApp' + str(datetime.datetime.now().timestamp())) # Unique app name
            print(f"Firebase Admin SDK initialized successfully with app name: {firebase_admin.get_app(name='schemaDumpApp' + str(datetime.datetime.now().timestamp())).name}")
        except Exception as e:
            print(f"Error initializing new Firebase app for schema dump: {e}")
            return False
    
    try:
        # Use the specific app if multiple are initialized, or default if only one
        app_to_use = firebase_admin.get_app(name='schemaDumpApp' + str(datetime.datetime.now().timestamp())) if 'schemaDumpApp' + str(datetime.datetime.now().timestamp()) in firebase_admin._apps else firebase_admin.get_app()
        db_firestore_for_schema_dump = firestore.client(app=app_to_use)
        print("Firestore client for schema dump obtained.")
        return True
    except Exception as e:
        print(f"Error obtaining Firestore client for schema dump: {e}")
        return False


# --- Example Usage (to be run in a separate cell or after this definition cell) ---
#
# SERVICE_ACCOUNT_KEY_PATH = "path/to/your/serviceAccountKey.json"  # <--- MUST BE SET
# N_SAMPLES = 1 # Number of samples per collection/subcollection
#
# if initialize_firebase_for_schema_dump(SERVICE_ACCOUNT_KEY_PATH):
#     database_samples_dict = get_database_samples_as_dict(n_samples_per_collection=N_SAMPLES)
#
#     if database_samples_dict:
#         # Now you can convert the dictionary to a JSON string if needed
#         try:
#             database_json_str = json.dumps(database_samples_dict, indent=2, default=_datetime_converter_for_json)
#             print("\n--- Complete Database Structure (JSON String Representation) ---")
#             print(database_json_str)
#             # At this point, you would copy 'database_json_str'
#             # IMPORTANT: Review 'database_json_str' for sensitive data before sharing!
#         except Exception as e:
#             print(f"\nError converting database samples dictionary to JSON: {e}")
#             print("You can still work with the 'database_samples_dict' Python dictionary object.")
# else:
#     print("Firebase could not be initialized for schema dump. Cannot proceed.")
#


In [8]:
from pprint import pprint
if initialize_firebase_for_schema_dump(CRED_PATH):
    result_dict = get_database_samples_as_dict(n_samples_per_collection=1)

print()
pprint(result_dict)

Firebase Admin SDK already initialized.
Firestore client for schema dump obtained.
Starting database structure sampling (fetching 1 sample(s) per collection/subcollection)...

Processing collection: 'levels'
Finished processing collection: 'levels'

Processing collection: 'vocabularyItems'
Finished processing collection: 'vocabularyItems'

Processing collection: 'quizzes'
  Processing subcollection: 'quizzes/grammar_quiz_01/questions'
Finished processing collection: 'quizzes'

Processing collection: 'articles'
  Processing subcollection: 'articles/sustainability_article_01/questions'
Finished processing collection: 'articles'

Processing collection: 'users'
Finished processing collection: 'users'

Database structure sampling complete.
REMINDER: The returned dictionary contains actual data. Sanitize before sharing if necessary.

{'articles': [{'_id': 'sustainability_article_01',
               '_subcollections': {'questions': [{'_id': 'aq1',
                                             