In [1]:
from py2neo import Graph

import json
import numpy as np
import pandas as pd

## Connect the jupyter notebook to the local Neo4j database

In [181]:
# Connect the neo4j to local database
graph = Graph("bolt://localhost:7687", auth=("neo4j", "00"))

## Prepare the data for the Neo4j database

In [78]:
df = pd.read_json('~/desktop/dsc104/assignments/final_project/data/test.json')

In [79]:
df.head()

Unnamed: 0,name,state,categories,hours,city,stars,address,review_count
0,Rubio's,AZ,"Restaurants, Seafood, Event Planning & Service...","{'Friday': '10:30-21:30', 'Monday': '10:30-21:...",Phoenix,3.5,"4747 E Bell Rd, Ste 17",100
1,Wrights at The Arizona Biltmore,AZ,"Restaurants, American (New), Breakfast & Brunch","{'Friday': '17:30-21:30', 'Monday': '17:30-21:...",Phoenix,4.0,2400 E Missouri Ave,139
2,Famous Ray's Pizza,AZ,"Pizza, Restaurants","{'Friday': '10:0-22:30', 'Monday': '10:0-21:30...",Phoenix,3.0,2501 W Happy Valley Rd,94
3,Cafe Rio,AZ,"Food, Mexican, Restaurants","{'Friday': '10:30-23:0', 'Monday': '10:30-22:0...",Phoenix,3.0,12005 N Tatum Blvd,89
4,Crazy Jim's,AZ,"Mediterranean, Pizza, Greek, Breakfast & Brunc...","{'Friday': '6:30-14:30', 'Monday': '0:0-0:0', ...",Phoenix,3.5,"305 W Washington St, Ste 104",97


In [81]:
data = df[['name', 'city', 'state', 'stars', 'review_count', 'categories']]
data = data.dropna()
data = data.reset_index()
data.columns = ['id','name', 'city', 'state', 'stars', 'review_count', 'categories']
data.head()

Unnamed: 0,id,name,city,state,stars,review_count,categories
0,0,Rubio's,Phoenix,AZ,3.5,100,"Restaurants, Seafood, Event Planning & Service..."
1,1,Wrights at The Arizona Biltmore,Phoenix,AZ,4.0,139,"Restaurants, American (New), Breakfast & Brunch"
2,2,Famous Ray's Pizza,Phoenix,AZ,3.0,94,"Pizza, Restaurants"
3,3,Cafe Rio,Phoenix,AZ,3.0,89,"Food, Mexican, Restaurants"
4,4,Crazy Jim's,Phoenix,AZ,3.5,97,"Mediterranean, Pizza, Greek, Breakfast & Brunc..."


In [83]:
# Export businesses data
business_data = data[['id', 'name', 'city', 'state', 'stars', 'review_count']]
business_data.to_csv('data/business.csv')

In [85]:
# Clean and export categories-business relationship data
categories = data[['id', 'categories']]
categories['categories'] = categories['categories'].apply(lambda x: x.split(","))

business_categories = categories.explode('categories')
business_categories = business_categories.reset_index(drop = True)

cat = pd.DataFrame(business_categories['categories'].drop_duplicates()).reset_index(drop = True).reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [89]:
business_categories['categories'] = business_categories['categories'].apply(lambda x: x.lstrip())
business_categories['categories'] = business_categories['categories'].apply(lambda x: x.rstrip())
business_categories.head()
business_categories.to_csv('bus_cat.csv')

In [165]:
business_categories.head()

Unnamed: 0,id,categories
0,0,Restaurants
1,0,Seafood
2,0,Event Planning & Services
3,0,Mexican
4,0,Caterers


In [90]:
# Export categories data
cat.columns = ['id', 'name']
cat.to_csv('data/categories.csv')

In [164]:
cat.head()

Unnamed: 0,id,name
0,0,Restaurants
1,1,Seafood
2,2,Event Planning & Services
3,3,Mexican
4,4,Caterers


## Use Cypher to load all data into neo4j database

In [182]:
# Move all three exported files to the created neo4j database import folder and follow the next steps
# Add the business dataset to neo4j database
query = """
USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM "file:///business.csv" AS row 
CREATE (:Business {name: row.name, id: row.id, city: row.city, state: row.state, review_count: row.review_count, stars:row.stars})
"""

graph.run(query).evaluate()

In [183]:
# Add the category dataset to neo4j database
query = """
USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM "file:///categories.csv" AS row 
MERGE (:Category {name: row.name, id: row.id})
"""

graph.run(query).evaluate()

In [184]:
# Add the business category relationship to neo4j database
query = """
LOAD CSV WITH HEADERS FROM "file:///bus_cat.csv" AS row 
MATCH (bus:Business{id:row.id})
MATCH (cat:Category{name:row.categories})
CREATE (bus)-[:BelongsToCategory]->(cat)
"""

graph.run(query).evaluate()

## One Query Example

In [203]:
query = """
MATCH (b:Business {city: $city})-[r:BelongsToCategory]->(c:Category {name: $cat})
RETURN b, r, c
"""

def return_restaurants_city_cat(city, category):
    return graph.run(query, city = city, cat = category)

In [204]:
return_restaurants_city_cat('Phoenix', 'Pizza')

 b                                                                                                                           | r                                                       | c                                          
-----------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------|--------------------------------------------
 (_35840:Business {city: 'Phoenix', id: '82', name: "Domino's Pizza", review_count: '12', stars: '2.0', state: 'AZ'})        | (Domino's Pizza)-[:BelongsToCategory {}]->(Pizza)       | (_35938:Category {id: '9', name: 'Pizza'}) 
 (_35844:Business {city: 'Phoenix', id: '86', name: "OG Original Geno's", review_count: '53', stars: '3.5', state: 'AZ'})    | (OG Original Geno's)-[:BelongsToCategory {}]->(Pizza)   | (_35938:Category {id: '9', name: 'Pizza'}) 
 (_35912:Business {city: 'Phoenix', id: '154', name: 'Little Caesars Pizza', review_