In [12]:

# Install the requirements
!pip install pymongo



In [13]:
# Data Handling
import numpy as np
import pandas as pd
import seaborn as sns

from pymongo import MongoClient
from bson import ObjectId
from typing import List, Dict
import pandas as pd
import json


In [5]:
def delete_collection_data(my_database, my_collection, cluster_uri):
    """
    Delete all records from the specified collection in the specified database.

    Parameters:
    - my_database (str): Name of the MongoDB database.
    - my_collection (str): Name of the MongoDB collection.
    - cluster_uri (str): MongoDB connection URI.
    """
    # Connect to MongoDB
    client = MongoClient(cluster_uri)

    # Access the database and collection
    db = client[my_database]
    collection = db[my_collection]

    # Delete all records from the collection
    collection.delete_many({})

    # Close the MongoDB connection
    client.close()


def insert_dataframe_to_collection(my_database, my_collection, cluster_uri, dataframe_dict):
    """
    Insert the DataFrame records into the specified collection in the specified database.

    Parameters:
    - my_database (str): Name of the MongoDB database.
    - my_collection (str): Name of the MongoDB collection.
    - cluster_uri (str): MongoDB connection URI.
    """
    # Connect to MongoDB
    client = MongoClient(cluster_uri)

    # Access the database and collection
    db = client[my_database]
    collection = db[my_collection]

    # Iterate over each collection and DataFrame in the dictionary
    for collection_name, dataframe in dataframe_dict.items():
        # Access the collection
        collection = db[collection_name]

        # Delete all records from the collection
        collection.delete_many({})

        # Convert DataFrame to dictionary
        data = dataframe.to_dict(orient='records')

        # Insert data into MongoDB collection
        collection.insert_many(data)

    # Close the MongoDB connection
    client.close()



def query_collection_to_dataframe(my_database, my_collection, cluster_uri):
    """
    Query all records from the specified collection in the specified database,
    and load the data into a DataFrame.

    Parameters:
    - my_database (str): Name of the MongoDB database.
    - my_collection (str): Name of the MongoDB collection.
    - cluster_uri (str): MongoDB connection URI.

    Returns:
    - dataframe (DataFrame): DataFrame containing the queried records.
    """
    # Connect to MongoDB
    client = MongoClient(cluster_uri)

    # Access the database and collection
    db = client[my_database]
    collection = db[my_collection]

    # Query all records from the collection
    cursor = collection.find({})

    # Convert cursor to DataFrame
    dataframe = pd.DataFrame(list(cursor))

    # Close the MongoDB connection
    client.close()

    return dataframe

In [6]:
# Usage example:
# Define database name, collection name, and MongoDB connection URI
my_database = ''
my_collection = ''
cluster_uri = ''

In [None]:
# Read data into DataFrames
train=pd.read_csv('')
test=pd.read_csv('')

# Create a dictionary where keys are collection names and values are DataFrames
dataframe_dict = {
    'train':train,
    'test':test
}

# Delete all records from the collection
delete_collection_data(my_database, my_collection, cluster_uri)

# Insert DataFrame records into the collection
insert_dataframe_to_collection(my_database, my_collection, cluster_uri, dataframe_dict)


In [None]:
# Query the collection holidays_events and load data into a DataFrame
holidays_events = query_collection_to_dataframe(my_database, 'holidays_events', cluster_uri)
holidays_events