In [9]:
# Step1-build DB and create collections

#Import libraries
import pymongo
import json
from flask import jsonify
#import csv bring in if we need to pull in csv
#import pandas as pd if we use any data frames bring in

# Connect to the MongoDB server
client = pymongo.MongoClient('mongodb://localhost:27017/')
client.drop_database('healthbycountydata')
# Create or get the 'healthbycountydata' database
db = client['healthbycountydata']


In [None]:
# Step2 -Loading data sets.  All Data sets need to be located in folder on local 
#Data is in JSON format to load 
#JSON folder is titled Project_3_Data

In [10]:
# json data file paths 
#if we have more data sources just add new line below. 
jsonfile_cancer = 'Cleaned_Data/cancer(2015-2019).json'
jsonfile_asthma = 'Cleaned_Data/asthma(2018-2020).json'
jsonfile_epa = 'Cleaned_Data/modified_final_epa_frs_clean.json'
jsonfile_healthOutcomes = 'Cleaned_Data/healthoutcome(2023).json'
#jsonfile_sig_vio ='Cleaned_Data/ECHO_Database_MN_SigViolations.geojson'

# create a list of json sources to loop through for collection builds
# if we add a data source above add it's var name to list below. 
sources = [jsonfile_cancer, jsonfile_asthma, jsonfile_epa, jsonfile_healthOutcomes]

#looping through list of sources and creating mongo collections
for json_file in sources:
    with open(json_file, 'r') as file:
        data = json.load(file)
        collection =db[(json_file.split('/')[-1].split('.')[0])]
        collection.insert_many(data)

#check to verity collections built
print(db.list_collection_names())

['asthma(2018-2020)', 'cancer(2015-2019)', 'healthoutcome(2023)', 'modified_final_epa_frs_clean']


In [11]:
# Step 3-Meta for collections for API developer
collection_names =db.list_collection_names()

for collection_name in collection_names:
        collection = db[collection_name]
        document = collection.find_one()
        print(f"\033[1mExample document from collection '{collection_name}'\n\n: \033[0m {document}\n\n")

[1mExample document from collection 'asthma(2018-2020)'

: [0m {'_id': ObjectId('64c30956e9ab7fbdc0b2db33'), 'county': 'Aitkin', 'asthma_rate_per_100k': 2.0, 'count_of_cases': 9.0}


[1mExample document from collection 'cancer(2015-2019)'

: [0m {'_id': ObjectId('64c30956e9ab7fbdc0b2dadb'), 'county': 'Aitkin', 'county_population': 15834.0, 'cancer_rate_per_100k': 449.5}


[1mExample document from collection 'healthoutcome(2023)'

: [0m {'_id': ObjectId('64c30957e9ab7fbdc0b2ead6'), 'FIPS': 27001.0, 'county': 'Aitkin', 'hlt_outcome_z': 0.4314502056, 'hlt_outcome_rank': 69.0, 'hlt_factor_z': 0.7610145124, 'hlt_factor_rank': 82.0}


[1mExample document from collection 'modified_final_epa_frs_clean'

: [0m {'_id': ObjectId('64c30956e9ab7fbdc0b2db8b'), 'REGISTRY_ID': 110040710905, 'SITE PRIMARY NAME': '13869 FORMER GAS STATION', 'SITE_ADDRESS': '402 W THORPE AVE', 'COUNTY_NAME': 'NORMAN', 'SITE_TYPE_NAME': 'STATIONARY', 'INTEREST_TYPES': 'LEAKING UNDERGROUND STORAGE TANK - ARRA', 'LA

In [12]:
#Step 4-merging the data for the api easy grab

collection1 = db['cancer(2015-2019)']
collection2 = db['asthma(2018-2020)']
collection3 = db['healthoutcome(2023)']

# Function to merge documents from multiple collections
def merge_collections(source_collection, target_collection):
    for document in source_collection.find():
        county = document['county']
        document.pop('_id', None)
        target_collection.update_one({'county': county}, {'$set': document}, upsert=True)

#Merge documents from each collection into a new collection named 'merged_collection'
merged_collection = db['merged_collection']

merge_collections(collection1, merged_collection)
merge_collections(collection2, merged_collection)
merge_collections(collection3, merged_collection)

print("Merged collections successfully!")
#print an example for api developer.  
db['merged_collection'].find_one()

Merged collections successfully!


{'_id': ObjectId('64c3096031be63eb8bf4d88c'),
 'county': 'Aitkin',
 'cancer_rate_per_100k': 449.5,
 'county_population': 15834.0,
 'asthma_rate_per_100k': 2.0,
 'count_of_cases': 9.0,
 'FIPS': 27001.0,
 'hlt_factor_rank': 82.0,
 'hlt_factor_z': 0.7610145124,
 'hlt_outcome_rank': 69.0,
 'hlt_outcome_z': 0.4314502056}

In [13]:

# Close the MongoDB connection
client.close()