In [1]:
# Step1-build DB and create collections

#Import libraries
import pymongo
import json
from flask import jsonify
#import csv bring in if we need to pull in csv
#import pandas as pd if we use any data frames bring in

# Connect to the MongoDB server
client = pymongo.MongoClient('mongodb://localhost:27017/')
client.drop_database('healthbycountydata')
# Create or get the 'healthbycountydata' database
db = client['healthbycountydata']


In [2]:
# Step2 -Loading data sets.  All Data sets need to be located in folder on local 
#Data is in JSON format to load 
#JSON folder is titled Project_3_Data

In [3]:
# json data file paths 
#if we have more data sources just add new line below. 
jsonfile_cancer = 'Cleaned_Data/cancer(2015-2019).json'
jsonfile_asthma = 'Cleaned_Data/asthma(2018-2020).json'
jsonfile_epa = 'Cleaned_Data/modified_final_epa_frs_clean.json'
jsonfile_healthOutcomes = 'Cleaned_Data/healthoutcome(2023).json'
#jsonfile_sig_vio ='Cleaned_Data/ECHO_Database_MN_SigViolations.geojson'

# create a list of json sources to loop through for collection builds
# if we add a data source above add it's var name to list below. 
sources = [jsonfile_cancer, jsonfile_asthma, jsonfile_epa, jsonfile_healthOutcomes]

#looping through list of sources and creating mongo collections
for json_file in sources:
    with open(json_file, 'r') as file:
        data = json.load(file)
        collection =db[(json_file.split('/')[-1].split('.')[0])]
        collection.insert_many(data)

#check to verity collections built
print(db.list_collection_names())

['asthma(2018-2020)', 'healthoutcome(2023)', 'modified_final_epa_frs_clean', 'cancer(2015-2019)']


In [4]:
# Step 3-Meta for collections for API developer
collection_names =db.list_collection_names()

for collection_name in collection_names:
        collection = db[collection_name]
        document = collection.find_one()
        print(f"\033[1mExample document from collection '{collection_name}'\n\n: \033[0m {document}\n\n")

[1mExample document from collection 'asthma(2018-2020)'

: [0m {'_id': ObjectId('64c30bf28b9cc1ef8a5b93f0'), 'County': 'Aitkin', 'Age-adjusted rate per 10,000': 2.0, 'Count of cases': 9.0}


[1mExample document from collection 'healthoutcome(2023)'

: [0m {'_id': ObjectId('64c30bf28b9cc1ef8a5c1ce7'), 'FIPS': 27001.0, 'County': 'Aitkin', 'HealthOutcome(Z-Score)': 0.4314502056, 'HealthOutcome(Rank)': 69.0, 'HealthFactor(Z-Score)': 0.7610145124, 'HealthFactor(Rank)': 82.0}


[1mExample document from collection 'modified_final_epa_frs_clean'

: [0m {'_id': ObjectId('64c30bf28b9cc1ef8a5b9448'), 'REGISTRY_ID': 110028032572, 'PRIMARY_NAME': 'ADAMS WIND FARM', 'LOCATION_ADDRESS': '12933 660 AVE', 'COUNTY_NAME': 'MOWER', 'SITE_TYPE_NAME': 'STATIONARY', 'INTEREST_TYPES': 'ELECTRIC GENERATOR, ELECTRIC POWER GENERATOR (WIND BASED)', 'LATITUDE': 43.54253, 'LONGITUDE': -92.728713}


[1mExample document from collection 'cancer(2015-2019)'

: [0m {'_id': ObjectId('64c30bf28b9cc1ef8a5b9398'), '

In [5]:
#Step 4-merging the data for the api easy grab

collection1 = db['cancer(2015-2019)']
collection2 = db['asthma(2018-2020)']
collection3 = db['healthoutcome(2023)']

# Function to merge documents from multiple collections
def merge_collections(source_collection, target_collection):
    for document in source_collection.find():
        county = document['County']
        document.pop('_id', None)
        target_collection.update_one({'County': county}, {'$set': document}, upsert=True)

#Merge documents from each collection into a new collection named 'merged_collection'
merged_collection = db['merged_collection']

merge_collections(collection1, merged_collection)
merge_collections(collection2, merged_collection)
merge_collections(collection3, merged_collection)

print("Merged collections successfully!")
#print an example for api developer.  
db['merged_collection'].find_one()

Merged collections successfully!


{'_id': ObjectId('64c30bf2f0cd20f757eab4ba'),
 'County': 'Aitkin',
 'County population': 15834.0,
 'Rate per 100,000': 449.5,
 'Age-adjusted rate per 10,000': 2.0,
 'Count of cases': 9.0,
 'FIPS': 27001.0,
 'HealthFactor(Rank)': 82.0,
 'HealthFactor(Z-Score)': 0.7610145124,
 'HealthOutcome(Rank)': 69.0,
 'HealthOutcome(Z-Score)': 0.4314502056}

In [6]:
collection = db["jsonfile_sig_vio"]  # Replace "your_collection_name" with your preferred collection name

# Step 2: Load GeoJSON data from the file
with open("Cleaned_Data/ECHO_Database_MN_SigViolations.geojson", "r") as file:
    data = json.load(file)

# Step 3: Extract the 'features' array from the GeoJSON data
features = data.get("features", [])

# Step 4: Insert each feature into MongoDB
for feature in features:
    collection.insert_one(feature)

db['jsonfile_sig_vio'].find_one()

{'_id': ObjectId('64c30bf28b9cc1ef8a5c1d3e'),
 'type': 'Feature',
 'geometry': {'type': 'Point', 'coordinates': [-92.908333, 44.789444]},
 'properties': {'FacName': '3M COTTAGE GROVE CENTER',
  'FacStreet': '10746 INNOVATION ROAD',
  'FacCity': 'COTTAGE GROVE',
  'FacState': 'MN',
  'RegistryID': '110000423667',
  'FacSNCFlg': 'Y',
  'FacQtrsWithNC': 12,
  'FacInspectionCount': 22,
  'FacFormalActionCount': 3,
  'FacMapFlg': 'Y',
  'SupOver80CountUsDisp': '0'}}

In [7]:

# Close the MongoDB connection
client.close()