## Mongo DB - API Processing

In [477]:
import json
import pymongo
import pandas as pd
import pprint
import os
from pymongo import MongoClient, UpdateOne
from bson.objectid import ObjectId
from datetime import datetime
import requests
import json

# Load the MongoDB connection string from the JSON file
PATH_TO_SECRET_JSON = '/home/jovyan/keys/mongodb_key.json'
with open(PATH_TO_SECRET_JSON) as f:
    MONGODB_URI = json.load(f)['connection_string']

# Extract the database name from the connection string
db_name = MONGODB_URI.split('/')[-1].split('?')[0]

# Create a MongoDB client using the connection string
if MONGODB_URI:
    client = MongoClient(MONGODB_URI)
    print("The MongoDB client has been initialized.")
else:
    print("Failed to initialize the MongoDB client.")

# Print the version of the pymongo package
pymongo_version = pymongo.__version__
print(f"The version of the pymongo package is {pymongo_version}")

# Connect to the specified MongoDB database and collection
db = client[db_name]
collection = db.attractions

The MongoDB client has been initialized.
The version of the pymongo package is 4.4.1


In [482]:
pipeline = [
    {
        "$match": {
            "landmark.lpNumber": { "$exists": True }
        }
    },
    {
        "$project": {
            "_id": 1,
            "title": 1,
            "lpNumber": "$landmark.lpNumber"
        }
    },
    {
        "$sort": {
            "lpNumber": 1
        }
    }
]

cursor = collection.aggregate(pipeline)

df = pd.DataFrame(list(cursor))
df


Unnamed: 0,_id,title,lpNumber
0,6444d14ee7abb0665474a208,Pieter Claesen Wyckoff House,LP-00001
1,6455b7e44673c82a34f4ecca,Commandant's House,LP-00002
2,64bc67c954ab1c604a34d1bc,U.S. Naval Hospital,LP-00003
3,54fa99fa5c95965f78def7bf,Audubon Center at the Boathouse,LP-00004
4,560f2c12f89701aedbaebc2c,Kingsland Homestead,LP-00005
...,...,...,...
1369,64a2f99afc95c50bc98dcf7c,Gowanus Canal Flushing Tunnel Pumping Station ...,LP-02638
1370,64a30238fc95c50bc98dcf82,Brooklyn Rapid Transit Company (BRT) Central P...,LP-02639
1371,64a300b5fc95c50bc98dcf81,Somers Brothers Tinware Factory (later America...,LP-02640
1372,64a2fd9dfc95c50bc98dcf7e,Montauk Paint Manufacturing Company Building,LP-02641


In [475]:
# Create a directory if it doesn't exist
output_directory = "../downloads/csv/"
os.makedirs(output_directory, exist_ok=True)

# Generate the CSV file name with current date and time
current_datetime = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
csv_filename = f"landmarks_{current_datetime}.csv"  # Fixed a typo here ('lanmarks' -> 'landmarks')
csv_filepath = os.path.join(output_directory, csv_filename)

# Save the DataFrame to a CSV file
df.to_csv(csv_filepath, index=False)

print(f"CSV file saved to: {csv_filepath}")


CSV file saved to: ../downloads/csv/landmarks_2023_09_30_23_56_21.csv


In [481]:
import requests
import json

# Base API endpoint and headers
url = 'https://microservice-api-w6zlqlyoma-uk.a.run.app/api/v1/FeatureService/attributelookup'
headers = {
    'accept': 'application/json',
    'Content-Type': 'application/json'
}

# Create an empty list to store the results
results = []

# Loop through the DataFrame
for lp_number in df['lpNumber']:
    # Payload for the API call
    payload = {
        "key": "IndividualLandmarkHistoricDistricts",
        "attributes": [
            {
                "key": "LPNumber",
                "value": lp_number
            }
        ]
    }
    
    # Make the API call
    response = requests.post(url, headers=headers, data=json.dumps(payload))
    
    # Handle the API response
    if response.status_code == 200:
        print(f"API call for {lp_number} was successful!")
        api_data = response.json()
        if api_data:  # Check if the API response has data
            results.append({
                "lpNumber": lp_number,
                "areaName": api_data[0].get("areaName", "")  # Assuming the API returns a list and you're interested in the first item
            })
    else:
        print(f"API call for {lp_number} failed with status code {response.status_code}.")       
            
            
# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Merge the two DataFrames on 'lpNumber'
merged_df = pd.merge(df, results_df, on='lpNumber', how='left')

# Save the merged DataFrame to a CSV file
merged_df.to_csv("../downloads/csv/output_filename.csv", index=False)



API call for LP-00001 was successful!
API call for LP-00002 was successful!
API call for LP-00003 was successful!
API call for LP-00004 was successful!
API call for LP-00005 failed with status code 500.
API call for LP-00006 was successful!
API call for LP-00007 was successful!
API call for LP-00008 was successful!
API call for LP-00009 was successful!
API call for LP-00010 was successful!
API call for LP-00011 was successful!
API call for LP-00012 was successful!
API call for LP-00013 was successful!
API call for LP-00016 was successful!
API call for LP-00017 was successful!
API call for LP-00018 was successful!
API call for LP-00020 was successful!
API call for LP-00022 was successful!
API call for LP-00023 was successful!
API call for LP-00024 was successful!
API call for LP-00025 was successful!
API call for LP-00026 was successful!
API call for LP-00027 was successful!
API call for LP-00029 was successful!
API call for LP-00030 was successful!
API call for LP-00036 was successful!