# NYC Restaurants — PyMongo CRUD
Run cells top-to-bottom.

In [2]:
%pip -q install pymongo pandas

Note: you may need to restart the kernel to use updated packages.


In [3]:
from pymongo import MongoClient
import json, os
from datetime import datetime
from pprint import pprint

MONGO_URI = "mongodb://mongo:27017"
DB_NAME = "nyc"
COLL_NAME = "restaurants"

client = MongoClient(MONGO_URI)
db = client[DB_NAME]
coll = db[COLL_NAME]
print("Connected to", MONGO_URI)


Connected to mongodb://mongo:27017


In [5]:
import pathlib, json
from datetime import datetime

data_path = pathlib.Path("/home/jovyan/work/data/restaurants.json")  # ou restaurants.json

def load_docs_flex(path: pathlib.Path):
    """
    Charge soit un JSON array:  [ {...}, {...} ]
    soit du NDJSON (JSON Lines): {...}\n{...}\n...
    Retourne une liste de dicts.
    """
    with open(path, "r", encoding="utf-8") as f:
        txt = f.read().strip()
    if not txt:
        return []
    # JSON array ?
    if txt[0] == "[":
        docs = json.loads(txt)
    else:
        # NDJSON: une ligne = un document
        docs = []
        for i, line in enumerate(txt.splitlines(), 1):
            line = line.strip()
            if not line:
                continue
            try:
                docs.append(json.loads(line))
            except json.JSONDecodeError as e:
                raise ValueError(f"Ligne {i} invalide (pas un JSON): {e}")
    return docs

if coll.estimated_document_count() == 0:
    docs = load_docs_flex(data_path)

    # Normaliser les dates des grades si présentes (string -> datetime)
    for d in docs:
        for g in d.get("grades", []):
            if isinstance(g.get("date"), str):
                try:
                    g["date"] = datetime.fromisoformat(g["date"])
                except Exception:
                    # si le format n'est pas ISO, on ignore: Mongo acceptera la string
                    pass

    if not docs:
        print("Aucun document chargé (fichier vide ?)")
    else:
        res = coll.insert_many(docs)
        print(f"✅ Inserted {len(res.inserted_ids)} documents from {data_path.name}")
else:
    print("Collection already has data:", coll.estimated_document_count())



JSONDecodeError: Extra data: line 2 column 1 (char 544)

## CREATE

In [None]:
new_restaurant = {
    "name": "Rayen Test Kitchen",
    "borough": "Brooklyn",
    "cuisine": "Bakery",
    "grades": [{"date": datetime(2024, 1, 5), "grade": "A"}],
    "address": {"building": "777", "street": "Flatbush Ave", "zipcode": "11226"}
}
insert_result = coll.insert_one(new_restaurant)
print("Inserted _id:", insert_result.inserted_id)


## READ

In [None]:
total = coll.count_documents({})
print("Total restaurants:", total)

print("\nBronx restaurants (preview):")
for r in coll.find({"borough": "Bronx"}, {"_id": 0, "name": 1, "borough": 1}):
    print("-", r)

print("\nBakery with grade A (preview):")
for r in coll.find({"cuisine": "Bakery", "grades.grade": "A"}, {"_id": 0, "name": 1, "grades.$": 1}).limit(20):
    print("-", r)


## UPDATE

In [None]:
update_res = coll.update_one(
    {"name": "Morris Park Bake Shop", "grades.0": {"$exists": True}},
    {"$set": {"grades.0.grade": "A+"}}
)
print("Matched:", update_res.matched_count, "| Modified:", update_res.modified_count)
pprint(coll.find_one({"name": "Morris Park Bake Shop"}, {"_id": 0, "name": 1, "grades": 1}))


## DELETE

In [None]:
delete_res = coll.delete_many({"grades.grade": "C"})
print("Deleted grade C:", delete_res.deleted_count)
print("Remaining grade C:", coll.count_documents({"grades.grade": "C"}))
