In [31]:
# import libraries
import pymongo
import json
import pandas as pd
from urllib.parse import quote_plus
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import sys

def get_secrets():
    with open('secrets.json') as secrets_file:
        secrets = json.load(secrets_file)

    return secrets

secrets = get_secrets()


def connect_to_mongodb():
    # Escape username and password
    escaped_username = quote_plus(secrets.get("USERNAME"))
    escaped_password = quote_plus(secrets.get("PASSWORD"))
    cluster_url = secrets.get("CLUSTER_URL")

    # Build MongoDB URI
    mongo_uri = f"mongodb+srv://{escaped_username}:{escaped_password}@{cluster_url}/?retryWrites=true&w=majority"

    try:
        # Create a new client and connect to the server
        client = MongoClient(mongo_uri, server_api=ServerApi('1'))

        # Send a ping to confirm a successful connection
        client.admin.command('ping')
        print("Pinged your deployment. You successfully connected to MongoDB!")

        return client, client[secrets.get("DATABASE_NAME")][secrets.get("COLLECTION_NAME")]

    except pymongo.errors.ConfigurationError:
        print("An Invalid URI host error was received. Is your Atlas host name correct in your connection string?")
        sys.exit(1)
    except Exception as e:
        print(f"An error occurred during MongoDB connection: {e}")
        sys.exit(1)

# Connect to MongoDB
client, my_collection = connect_to_mongodb()


# Find all documents in the collection
cursor = my_collection.find()

# Convert cursor to list of dictionaries
documents = list(cursor)

# Check if documents are found
if not documents:
    print("No documents found.")
else:
    # Create a DataFrame
    df = pd.DataFrame(documents)

    # Display the DataFrame
    print(df.tail(1))

Pinged your deployment. You successfully connected to MongoDB!
                         _id user_input  prediction     county  \
24  656a98ef847e3ab6806fa4e6        NaN         NaN  Manhattan   

                     facility_name age_group gender        race  \
24  Albany Medical Center Hospital  50 to 69      M  Other Race   

            ethnicity ccsr_procedure_desc  length_of_stay    year  \
24  Not Span/Hispanic                  No            24.0  2023.0   

   severity_of_illness_code  estimation  
24                        4  184028.625  


In [32]:
import pandas as pd


df.head()

Unnamed: 0,_id,user_input,prediction,county,facility_name,age_group,gender,race,ethnicity,ccsr_procedure_desc,length_of_stay,year,severity_of_illness_code,estimation
0,655fd08428029b25228bf016,"[Manhattan, Albany Medical Center Hospital, 0 ...",17614.660156,,,,,,,,,,,
1,655fd0c228029b25228bf017,"[Manhattan, Albany Medical Center Hospital, 0 ...",175188.375,,,,,,,,,,,
2,655fd3812f4322d545c5b0cf,"[Manhattan, Albany Medical Center Hospital, 0 ...",17614.660156,,,,,,,,,,,
3,655fd3a52f4322d545c5b0d0,"[Manhattan, Albany Medical Center Hospital, 0 ...",24586.767578,,,,,,,,,,,
4,655fd44b2f4322d545c5b0d1,"[Manhattan, Albany Medical Center Hospital, 0 ...",295287.875,,,,,,,,,,,


In [33]:
df["estimation"].fillna(value=df["prediction"], inplace=True)

In [42]:
df.head()

Unnamed: 0,_id,user_input,county,facility_name,age_group,gender,race,ethnicity,ccsr_procedure_desc,length_of_stay,year,severity_of_illness_code,estimation
0,655fd08428029b25228bf016,"[Manhattan, Albany Medical Center Hospital, 0 to 17, F, Other Race, Not Span/Hispanic, No, 1, 2023, 1]",,,,,,,,,,,17614.660156
1,655fd0c228029b25228bf017,"[Manhattan, Albany Medical Center Hospital, 0 to 17, M, Other Race, Not Span/Hispanic, No, 21, 2023, 1]",,,,,,,,,,,175188.375
2,655fd3812f4322d545c5b0cf,"[Manhattan, Albany Medical Center Hospital, 0 to 17, M, Other Race, Not Span/Hispanic, No, 1, 2023, 1]",,,,,,,,,,,17614.660156
3,655fd3a52f4322d545c5b0d0,"[Manhattan, Albany Medical Center Hospital, 0 to 17, M, Other Race, Not Span/Hispanic, CARDIAC CHEST COMPRESSION, 1, 2023, 1]",,,,,,,,,,,24586.767578
4,655fd44b2f4322d545c5b0d1,"[Manhattan, Albany Medical Center Hospital, 0 to 17, M, Other Race, Not Span/Hispanic, CARDIAC CHEST COMPRESSION, 22, 2023, 1]",,,,,,,,,,,295287.875


In [35]:
df.drop("prediction", axis=1, inplace=True)

In [43]:
df.tail()

Unnamed: 0,_id,user_input,county,facility_name,age_group,gender,race,ethnicity,ccsr_procedure_desc,length_of_stay,year,severity_of_illness_code,estimation
20,6564b4cd3bd2c1177bf8df12,,Westchester,Arnot Ogden Medical Center,70 or Older,F,Multi-racial,Spanish/Hispanic,No,14.0,2024.0,4,122512.109375
21,6564badb3bd2c1177bf8df13,,Westchester,Arnot Ogden Medical Center,70 or Older,M,Multi-racial,Spanish/Hispanic,No,30.0,2024.0,4,238805.515625
22,6564bc863bd2c1177bf8df14,,Westchester,Arnot Ogden Medical Center,70 or Older,M,Multi-racial,Spanish/Hispanic,No,30.0,2024.0,4,238805.515625
23,6564bd113bd2c1177bf8df17,,Westchester,Arnot Ogden Medical Center,70 or Older,M,Multi-racial,Spanish/Hispanic,No,25.0,2024.0,4,218594.203125
24,656a98ef847e3ab6806fa4e6,,Manhattan,Albany Medical Center Hospital,50 to 69,M,Other Race,Not Span/Hispanic,No,24.0,2023.0,4,184028.625
