In [1]:
import pandas as pd
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client["yelp_dataset"]

businesses_collection = db["businesses"]
reviews_collection = db["reviews"]
users_collection = db["users"]

# make sure to add run the code to do this in setup.ipynb before running this
business_file = "sampled_businesses.csv"
review_file = "filtered_reviews.csv"
user_file = "filtered_users.csv"

# Load and insert data into MongoDB
def load_csv_to_mongo(file_path, collection):
    data = pd.read_csv(file_path)
    records = data.to_dict(orient="records")  # Convert DataFrame to list of dicts
    collection.insert_many(records)
    print(f"Inserted {len(records)} records into {collection.name} collection.")

# Load data into collections
load_csv_to_mongo(business_file, businesses_collection)
load_csv_to_mongo(review_file, reviews_collection)
load_csv_to_mongo(user_file, users_collection)

Inserted 10000 records into businesses collection.
Inserted 445853 records into reviews collection.
Inserted 295709 records into users collection.


In [2]:
print("Number of businesses:", businesses_collection.count_documents({}))
print("Number of reviews:", reviews_collection.count_documents({}))
print("Number of users:", users_collection.count_documents({}))

Number of businesses: 30000
Number of reviews: 1337559
Number of users: 887127


In [3]:
# sample easy query
five_star_businesses = businesses_collection.find({"stars": 5})
for business in five_star_businesses:
    print(business)

pipeline = [
    {"$sort": {"review_count": -1}},
    {"$limit": 10}
]
top_users = users_collection.aggregate(pipeline)
for user in top_users:
    print(user)

{'_id': ObjectId('67524b88ccf0cdec735bd0ec'), 'business_id': 'An2JUt207oyqhI4mkTZGWA', 'name': 'Dentopia Dental', 'city': 'Tucson', 'stars': 5.0, 'review_count': 6, 'categories': 'Cosmetic Dentists, Health & Medical, Pediatric Dentists, Dentists, General Dentistry'}
{'_id': ObjectId('67524b88ccf0cdec735bd100'), 'business_id': 'Z8_5Hu2QqZux95GbPRMKpA', 'name': 'NOLA Doc', 'city': 'New Orleans', 'stars': 5.0, 'review_count': 18, 'categories': 'Health & Medical, Hospice, Nutritionists, Family Practice, IV Hydration, Urgent Care, Home Health Care, Doctors'}
{'_id': ObjectId('67524b88ccf0cdec735bd103'), 'business_id': 'nrinDIKjrb3E5HKMUCCUKg', 'name': 'Michill Designs', 'city': 'Reno', 'stars': 5.0, 'review_count': 6, 'categories': 'Videographers, Graphic Design, Real Estate Photography, Professional Services, Event Planning & Services, Home Services, Web Design, Real Estate, Real Estate Services, Video/Film Production, Event Photography, Photographers'}
{'_id': ObjectId('67524b88ccf0cdec73