<a href="https://colab.research.google.com/github/nirupam15oct1/GRPO-First-Code/blob/main/Store_PDFs_in_MongoDB_with_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pymongo
import os
from bson.binary import Binary

def store_pdfs_to_mongodb(folder_path, mongo_uri, database_name, collection_name):
    """
    Stores PDF files from a specified folder into a MongoDB collection.

    Args:
        folder_path (str): The path to the folder containing the PDF files.
        mongo_uri (str): The MongoDB connection URI (e.g., "mongodb://localhost:27017/").
        database_name (str): The name of the MongoDB database to use.
        collection_name (str): The name of the MongoDB collection to store the files in.

    Returns:
        None.  Prints success/error messages to the console.
    """
    try:
        # 1. Connect to MongoDB
        client = pymongo.MongoClient(mongo_uri)
        db = client[database_name]
        collection = db[collection_name]

        # 2. Check if the folder exists
        if not os.path.exists(folder_path):
            print(f"Error: Folder not found at {folder_path}")
            return

        # 3. Iterate through files in the folder
        for filename in os.listdir(folder_path):
            if filename.lower().endswith(".pdf"):
                filepath = os.path.join(folder_path, filename)

                try:
                    # 4. Read the PDF file
                    with open(filepath, "rb") as f:
                        pdf_content = f.read()

                    # 5. Create a document to store in MongoDB
                    pdf_document = {
                        "filename": filename,
                        "data": Binary(pdf_content),  # Store the raw bytes
                        "file_path": filepath  # Store the file path
                    }

                    # 6. Insert the document into the collection
                    collection.insert_one(pdf_document)
                    print(f"Stored {filename} in MongoDB")

                except Exception as e:
                    print(f"Error processing {filename}: {e}")

        print("Finished processing files.")

    except pymongo.errors.ConnectionFailure as e:
        print(f"Error connecting to MongoDB: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        if client: # Ensure client is defined before trying to close.
            client.close()  # Close the connection in a finally block

if __name__ == "__main__":
    # Get user inputs
    folder_path = input("Enter the path to the folder containing the PDF files: ")
    mongo_uri = input("Enter your MongoDB connection URI (e.g., mongodb://localhost:27017/): ")
    database_name = input("Enter the name of the database: ")
    collection_name = input("Enter the name of the collection: ")

    # Call the function to store the PDFs
    store_pdfs_to_mongodb(folder_path, mongo_uri, database_name, collection_name)
    print("Program execution completed.") #Addded to show completion