# Directory Historical Signs - Import Data

https://data.cityofnewyork.us/Recreation/Directory-Historical-Signs/xdkk-pvdv

## Import Required Libraries

In [344]:
import json
import os
import pandas as pd
from pymongo import MongoClient
from bson.objectid import ObjectId 

# Load the MongoDB connection string from the JSON file
PATH_TO_SECRET_JSON = '/home/jovyan/keys/mongodb_key.json'
with open(PATH_TO_SECRET_JSON) as f:
    MONGODB_URI = json.load(f)['connection_string']

current_working_directory = os.getcwd()

print("Current Working Directory:", current_working_directory)


Current Working Directory: /home/jovyan/work


## Load Data into DataFrame

In [324]:
# Read the entire JSON file into a DataFrame
with open('../data/reference/DPR_HistoricalSigns_001.json', 'r') as f:
    entire_data = json.load(f)

# Convert to DataFrame
df_entire = pd.DataFrame(entire_data)

# Convert the DataFrame to a list of dictionaries
records_entire = df_entire.to_dict(orient='records')

# Insert the records into the MongoDB collection
collection.insert_many(records_entire)

# Confirm that the records are inserted
print(f"{len(records_entire)} records inserted into 'historicalSigns' collection.")


2249 records inserted into 'historicalSigns' collection.


In [325]:
df.head(2)

Unnamed: 0,name,location,borough,content,propID
0,Bartow-Pell Mansion,Pelham Bay Park,Bronx,<p>This historic house stands on a tract of la...,X039
1,E.M.T. Christopher J. Prescott Playground,1.156 Acres,Staten_Island,<p><strong>What was here before?</strong></p>\...,R085


In [312]:
column_list = df.columns.tolist()
print("column_list:", column_list)

total_records = len(df)
print("Total Number of Records:", total_records)

column_list: ['name', 'location', 'borough', 'content', 'propID']
Total Number of Records: 20


## Connect to MongoDB

In [345]:
# Extract the database name from the connection string
db_name = MONGODB_URI.split('/')[-1].split('?')[0]

# Create a MongoDB client using the connection string
if MONGODB_URI:
    client = MongoClient(MONGODB_URI)
    print("The MongoDB client has been initialized.")
else:
    print("Failed to initialize the MongoDB client.")

# Print the version of the pymongo package
pymongo_version = pymongo.__version__
print(f"The version of the pymongo package is {pymongo_version}")


# Connect to the specified MongoDB database and collection
db = client[db_name]
collection = db.historicalSigns

total_records_in_collection = collection.count_documents({})
print(f"Total Number of Records in 'historicalSigns' Collection: {total_records_in_collection}")

The MongoDB client has been initialized.
The version of the pymongo package is 4.4.1
Total Number of Records in 'historicalSigns' Collection: 2269


## Insert Data into MongoDB

In [333]:
# Convert the DataFrame to a list of dictionaries
#records = df.to_dict(orient='records')

# Insert the records into the MongoDB collection
#collection.insert_many(records)

# Print a message indicating successful insertion
#print(f"{len(records)} records inserted into 'historicalSigns' collection.")


#### Total Records 

In [397]:
# Delete All 
#collection.delete_many({})
print(f"Total records after deletion: {collection.count_documents({})}")

Total records after deletion: 2269


In [404]:
data = [

{'_id': '64f688c2a9b9517c24c7e7ce', 'tags': ['nycparks:b010=206']},
{'_id': '64f688c2a9b9517c24c7e656', 'tags': ['nycparks:m071=266']},
{'_id': '64f688c2a9b9517c24c7e6fd', 'tags': ['nycparks:b047=722']},
{'_id': '64f688c2a9b9517c24c7ebe7', 'tags': ['nycparks:b035=397']},
{'_id': '64f688c2a9b9517c24c7eb4c', 'tags': ['nycparks:b073=1927']},
{'_id': '64f688c2a9b9517c24c7e40f', 'tags': ['nycparks:x040=1194']},
{'_id': '64f688c2a9b9517c24c7e626', 'tags': ['nycparks:q099=1893']},
{'_id': '64f688c2a9b9517c24c7e91a', 'tags': ['nycparks:q099=519']},
{'_id': '64f688c2a9b9517c24c7e91b', 'tags': ['nycwayfinding:lp=00826']},
{'_id': '64f688c2a9b9517c24c7e67c', 'tags': ['nycparks:x092=1212']},
{'_id': '64f688c2a9b9517c24c7eb29', 'tags': ['nycparks:m098=1682']},
{'_id': '64f688c2a9b9517c24c7e925', 'tags': ['nycparks:q075=589']},
{'_id': '64f688c2a9b9517c24c7e9a6', 'tags': ['nycparks:m071=666']},
{'_id': '64f688c2a9b9517c24c7e83f', 'tags': ['nycparks:b115=1892']},
{'_id': '64f688c2a9b9517c24c7e7f2', 'tags': ['nycparks:x080=751']},
{'_id': '64f688dda9b9517c24c7ec74', 'tags': ['nycwayfinding:lp=02234']},
{'_id': '64f688c2a9b9517c24c7ebf3', 'tags': ['nycwayfinding:lp=01924']},
{'_id': '64f688c2a9b9517c24c7e746', 'tags': ['nycparks:b114=1049']},
{'_id': '64f688c2a9b9517c24c7e418', 'tags': ['nycparks:x038=530']},
{'_id': '64f688c2a9b9517c24c7e64f', 'tags': ['nyccentralpark:landscapes=73554950']},
{'_id': '64f688c2a9b9517c24c7eb6d', 'tags': ['nycparks:m088=1453']},
{'_id': '64f688c2a9b9517c24c7ebae', 'tags': ['nycparks:q099=1908']},
{'_id': '64f688c2a9b9517c24c7e889', 'tags': ['nycwayfinding:lp=00835']},
{'_id': '64f688c2a9b9517c24c7e45d', 'tags': ['nyccentralpark:recreation=73554088']},
{'_id': '64f688c2a9b9517c24c7e9b5', 'tags': ['nycparks:m089=797']},
{'_id': '64f688c2a9b9517c24c7e427', 'tags': ['nycwayfinding:lp=00127']},
{'_id': '64f688c2a9b9517c24c7e483', 'tags': ['nycparks:m046=1633']},
{'_id': '64f688c2a9b9517c24c7ec65', 'tags': ['nycparks:m098=1657']},
{'_id': '64f688c2a9b9517c24c7eb31', 'tags': ['nycopendata:qz43=675']},

];

In [405]:

from bson import ObjectId

# Loop through each item in the data array
for item in data:
    query = {'_id': ObjectId(item['_id'])}  # Use ObjectId conversion here
        
    # Split tags by commas and convert to list of dictionaries
    new_tags = [{'tag': tag.strip()} for tag_list in item['tags'] for tag in tag_list.split(",")]
    
    # Debug: Show the tags that are about to be added
    print(f"Adding tags: {new_tags}")
    
    # Use "$addToSet" with "$each" to add all new tags if they don't already exist
    update_result = collection.update_one(query, {'$addToSet': {'machineTags': {'$each': new_tags}}})
        
    # Check if the document was updated
    if update_result.modified_count > 0:
        print(f"Document with _id: {item['_id']} updated.")
    else:
        print(f"Document with _id: {item['_id']} not updated or tag(s) already exist.")



Adding tags: [{'tag': 'nycparks:b010=206'}]
Document with _id: 64f688c2a9b9517c24c7e7ce updated.
Adding tags: [{'tag': 'nycparks:m071=266'}]
Document with _id: 64f688c2a9b9517c24c7e656 updated.
Adding tags: [{'tag': 'nycparks:b047=722'}]
Document with _id: 64f688c2a9b9517c24c7e6fd updated.
Adding tags: [{'tag': 'nycparks:b035=397'}]
Document with _id: 64f688c2a9b9517c24c7ebe7 updated.
Adding tags: [{'tag': 'nycparks:b073=1927'}]
Document with _id: 64f688c2a9b9517c24c7eb4c updated.
Adding tags: [{'tag': 'nycparks:x040=1194'}]
Document with _id: 64f688c2a9b9517c24c7e40f updated.
Adding tags: [{'tag': 'nycparks:q099=1893'}]
Document with _id: 64f688c2a9b9517c24c7e626 updated.
Adding tags: [{'tag': 'nycparks:q099=519'}]
Document with _id: 64f688c2a9b9517c24c7e91a updated.
Adding tags: [{'tag': 'nycwayfinding:lp=00826'}]
Document with _id: 64f688c2a9b9517c24c7e91b updated.
Adding tags: [{'tag': 'nycparks:x092=1212'}]
Document with _id: 64f688c2a9b9517c24c7e67c updated.
Adding tags: [{'tag':