# Yelp Data Analysis with Neo4j

In [1]:
from neo4jUtils import *
import time

### Configuration

In [2]:
NEO4J_URI = "bolt://localhost:7687"  # Update with your Neo4j URI
NEO4J_USER = "neo4j"                 # Update with your username
NEO4J_PASSWORD = "123456789"          # Update with your password
    
BUSINESS_FILE = "reduced_businesses.json"
USER_FILE = "reduced_users.json"
TIP_FILE = "reduced_tips.json"

### Load Data

In [3]:
loader = Neo4jLoader(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
# Clear existing data (optional - comment out if you want to keep existing data)
loader.clear_database()
    
# Create constraints and indexes
loader.create_constraints()
loader.close()

2024-11-22 23:28:08,217 - INFO - Database cleared
2024-11-22 23:28:08,300 - INFO - Created constraint/index: CREATE CONSTRAINT business_id IF NOT EXISTS FOR (b:Business) REQUIRE b.business_id IS UNIQUE
2024-11-22 23:28:08,352 - INFO - Created constraint/index: CREATE CONSTRAINT user_id IF NOT EXISTS FOR (u:User) REQUIRE u.user_id IS UNIQUE
2024-11-22 23:28:08,407 - INFO - Created constraint/index: CREATE CONSTRAINT category_name IF NOT EXISTS FOR (c:Category) REQUIRE c.name IS UNIQUE
2024-11-22 23:28:08,452 - INFO - Created constraint/index: CREATE CONSTRAINT tip_composite IF NOT EXISTS FOR (t:Tip) REQUIRE (t.user_id, t.business_id, t.date) IS UNIQUE
2024-11-22 23:28:08,475 - INFO - Created constraint/index: CREATE INDEX business_name IF NOT EXISTS FOR (b:Business) ON (b.name)
2024-11-22 23:28:08,532 - INFO - Created constraint/index: CREATE INDEX business_city IF NOT EXISTS FOR (b:Business) ON (b.city)
2024-11-22 23:28:08,568 - INFO - Created constraint/index: CREATE INDEX user_name I

In [4]:
loader = Neo4jLoader(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

try:
    start_time = time.time()
    
    loader.load_businesses(BUSINESS_FILE)
    
    end_time = time.time()
    loader.logger.info(f"Total import time: {end_time - start_time:.2f} seconds")
    
except Exception as e:
    loader.logger.error(f"Error during import: {str(e)}")
finally:
    loader.close()

2024-11-22 23:28:12,297 - INFO - Starting business import
2024-11-22 23:28:15,895 - INFO - Processed 901 businesses
2024-11-22 23:28:16,535 - INFO - Processed 1701 businesses
2024-11-22 23:28:17,188 - INFO - Processed 2444 businesses
2024-11-22 23:28:17,755 - INFO - Processed 3171 businesses
2024-11-22 23:28:18,265 - INFO - Processed 3891 businesses
2024-11-22 23:28:18,704 - INFO - Processed 4564 businesses
2024-11-22 23:28:19,123 - INFO - Processed 5268 businesses
2024-11-22 23:28:19,537 - INFO - Processed 5955 businesses
2024-11-22 23:28:19,918 - INFO - Processed 6614 businesses
2024-11-22 23:28:20,278 - INFO - Processed 7272 businesses
2024-11-22 23:28:20,278 - INFO - Completed business import. Total businesses created: 7272
2024-11-22 23:28:20,281 - INFO - Total import time: 7.98 seconds


In [5]:
# Load user data
try:
    loader = Neo4jLoader(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
    start_time = time.time()
    loader.load_users(USER_FILE)
    end_time = time.time()
    loader.logger.info(f"Total import time: {end_time - start_time:.2f} seconds")
except Exception as e:
    loader.logger.error(f"Error during import: {str(e)}")
finally:
    loader.close()
    
    

2024-11-22 23:28:23,082 - INFO - Starting user import
2024-11-22 23:28:25,481 - INFO - Processed 1000 users
2024-11-22 23:28:25,724 - INFO - Processed 2000 users
2024-11-22 23:28:25,954 - INFO - Processed 3000 users
2024-11-22 23:28:26,128 - INFO - Processed 4000 users
2024-11-22 23:28:26,301 - INFO - Processed 5000 users
2024-11-22 23:28:26,466 - INFO - Processed 6000 users
2024-11-22 23:28:26,642 - INFO - Processed 7000 users
2024-11-22 23:28:26,814 - INFO - Processed 8000 users
2024-11-22 23:28:26,995 - INFO - Processed 9000 users
2024-11-22 23:28:27,169 - INFO - Processed 10000 users
2024-11-22 23:28:27,382 - INFO - Processed 11000 users
2024-11-22 23:28:27,567 - INFO - Processed 12000 users
2024-11-22 23:28:27,746 - INFO - Processed 13000 users
2024-11-22 23:28:27,921 - INFO - Processed 14000 users
2024-11-22 23:28:28,103 - INFO - Processed 15000 users
2024-11-22 23:28:28,279 - INFO - Processed 16000 users
2024-11-22 23:28:28,447 - INFO - Processed 17000 users
2024-11-22 23:28:28,

In [6]:
# Load tip data, this one has to be run after the user and business data is loaded
try:
    loader = Neo4jLoader(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
    start_time = time.time()
    loader.load_tips(TIP_FILE)
    end_time = time.time()
    loader.logger.info(f"Total import time: {end_time - start_time:.2f} seconds")

except Exception as e:
    loader.logger.error(f"Error during import: {str(e)}")
finally:
    loader.close()

2024-11-22 23:32:35,233 - INFO - Starting tips import
2024-11-22 23:32:37,627 - INFO - Processed 1000 tips
2024-11-22 23:32:37,812 - INFO - Processed 2000 tips
2024-11-22 23:32:37,969 - INFO - Processed 3000 tips
2024-11-22 23:32:38,138 - INFO - Processed 4000 tips
2024-11-22 23:32:38,244 - INFO - Processed 5000 tips
2024-11-22 23:32:38,370 - INFO - Processed 6000 tips
2024-11-22 23:32:38,498 - INFO - Processed 7000 tips
2024-11-22 23:32:38,601 - INFO - Processed 8000 tips
2024-11-22 23:32:38,685 - INFO - Processed 9000 tips
2024-11-22 23:32:38,791 - INFO - Processed 10000 tips
2024-11-22 23:32:38,934 - INFO - Processed 11000 tips
2024-11-22 23:32:39,039 - INFO - Processed 12000 tips
2024-11-22 23:32:39,143 - INFO - Processed 13000 tips
2024-11-22 23:32:39,238 - INFO - Processed 14000 tips
2024-11-22 23:32:39,332 - INFO - Processed 15000 tips
2024-11-22 23:32:39,436 - INFO - Processed 16000 tips
2024-11-22 23:32:39,534 - INFO - Processed 17000 tips
2024-11-22 23:32:39,635 - INFO - Proc

In [3]:
# Query the database
loader = Neo4jLoader(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
query = """
MATCH (b:Business)-[:IN_CATEGORY]->(c:Category)
RETURN c.name AS category, count(b) AS businessCount
ORDER BY businessCount DESC
LIMIT 10
"""
result = loader.run_query(query)
for record in result:
    print(record)

{'category': 'Beauty & Spas, Nail Salons', 'businessCount': 73}
{'category': 'Restaurants, Pizza', 'businessCount': 72}
{'category': 'Nail Salons, Beauty & Spas', 'businessCount': 54}
{'category': 'Mexican, Restaurants', 'businessCount': 50}
{'category': 'Chinese, Restaurants', 'businessCount': 49}
{'category': 'Pizza, Restaurants', 'businessCount': 47}
{'category': 'Restaurants, Mexican', 'businessCount': 47}
{'category': 'Restaurants, Chinese', 'businessCount': 46}
{'category': 'Food, Coffee & Tea', 'businessCount': 37}
{'category': 'Coffee & Tea, Food', 'businessCount': 34}
