# Directory Historical Signs - Import Data

https://data.cityofnewyork.us/Recreation/Directory-Historical-Signs/xdkk-pvdv

## Import Required Libraries

In [314]:
import json
import os
import pandas as pd
from pymongo import MongoClient

# Load the MongoDB connection string from the JSON file
PATH_TO_SECRET_JSON = '/home/jovyan/keys/mongodb_key.json'
with open(PATH_TO_SECRET_JSON) as f:
    MONGODB_URI = json.load(f)['connection_string']

current_working_directory = os.getcwd()

print("Current Working Directory:", current_working_directory)


Current Working Directory: /home/jovyan/work


## Load Data into DataFrame

In [301]:
with open('../data/reference/DPR_HistoricalSigns_001.json', 'r') as f:
    data = json.load(f)

# Get first 20 records
first_20_records = data[:20]

# Convert to DataFrame
df = pd.DataFrame(first_20_records)

In [313]:
df.head(2)

Unnamed: 0,name,location,borough,content,propID
0,Bartow-Pell Mansion,Pelham Bay Park,Bronx,<p>This historic house stands on a tract of la...,X039
1,E.M.T. Christopher J. Prescott Playground,1.156 Acres,Staten_Island,<p><strong>What was here before?</strong></p>\...,R085


In [312]:
column_list = df.columns.tolist()
print("column_list:", column_list)

total_records = len(df)
print("Total Number of Records:", total_records)

column_list: ['name', 'location', 'borough', 'content', 'propID']
Total Number of Records: 20


## Connect to MongoDB

In [320]:
# Extract the database name from the connection string
db_name = MONGODB_URI.split('/')[-1].split('?')[0]

# Create a MongoDB client using the connection string
if MONGODB_URI:
    client = MongoClient(MONGODB_URI)
    print("The MongoDB client has been initialized.")
else:
    print("Failed to initialize the MongoDB client.")

# Print the version of the pymongo package
pymongo_version = pymongo.__version__
print(f"The version of the pymongo package is {pymongo_version}")


# Connect to the specified MongoDB database and collection
db = client[db_name]
collection = db.historicalSigns

total_records_in_collection = collection.count_documents({})
print(f"Total Number of Records in 'historicalSigns' Collection: {total_records_in_collection}")

The MongoDB client has been initialized.
The version of the pymongo package is 4.4.1
Total Number of Records in 'historicalSigns' Collection: 0


## Insert Data into MongoDB

In [None]:
# Convert the DataFrame to a list of dictionaries and insert them into MongoDB
records = df.to_dict(orient='records')
collection.insert_many(records)