# Graph Storage of Venues
Now that we have extracted the keywords from our venue list, it is time to store the Venue's and keywords in a graph database. To do this we will need to parse the extracted keyword-venue JSON objects to create Cypher Statements for writing the entities and relationships to our Neo4J database.

This notebook will walk us through a few principle steps:
1. Creating Cyphers from the extracted JSON data
2. Using Cyphers to write to Neo4J
3. Basic querying and exploration of the Graph



In [69]:
import os
from dotenv import load_dotenv

load_dotenv()

YELP_API_KEY = os.getenv("YELP_API_KEY")

In [90]:
import requests

# Yelp Fusion API URL
API_URL = "https://api.yelp.com/v3"
BUSINESS_SEARCH_ENDPOINT = "/businesses/{}"

# Search Params For API Request

# Authorization
HEADERS = {
    "Authorization": "Bearer " + YELP_API_KEY,
}

def request_city_data(biz_id: str):
    """Request data from Yelp API for a given city"""
    url = API_URL + BUSINESS_SEARCH_ENDPOINT.format(biz_id)
    results = requests.get(url, headers=HEADERS).json()
    try:
        return {'name': results['name'], 'rating': results['rating']}
    except KeyError:
        return None

request_city_data("C6ohbrxuiGBuk7_a4oqy4Q")

{'name': 'Guest - New York', 'rating': 5.0}

In [92]:
# Function for extracting the data from the JSON file into the Cypher Text
import json
import re

from tqdm import tqdm

def title_case_string(val: str, split_delim: str) -> str:
    words = val.split(split_delim)
    title_case = "".join([word.title() for word in words])

    # Surround any numbers with backticks
    num_pattern = r"\d+"
    title_case = re.sub(num_pattern, lambda match: f"`{match.group()}`", title_case)
    
    # Surrond any special characters with backticks
    special_chars_pattern = r"[\'\(\),\.\:\;\!\/\&\-\ ]"
    title_case = re.sub(special_chars_pattern, lambda match: f"`{match.group()}`", title_case)

    return title_case

with open("../data/trimmed_location_data.json", 'r') as locations:
    location_data = json.load(locations)
    location_data = {loc['id']: {'name': loc['name'], 'rating': loc['rating']} for loc in location_data}

with open("../data/normalized_keywords.json", "r") as venues:
    venue_data = json.load(venues)
    joined_data = []
    for venue in tqdm(venue_data):
        try:
            loc_data = request_city_data(venue['id'])
            venue['properties'] = dict()
            venue['properties']['name'] = loc_data['name']
            venue['properties']['rating'] = loc_data['rating']
            venue['properties']['id'] = venue['id']
            del venue['id']
            joined_data.append(venue)
        except Exception as e:
            del venue
            print("Error: ", e)
            continue

with open("../data/venue_keywords_joined.json", "w") as f:
    json.dump(joined_data, f, indent=4)

100%|██████████| 5/5 [00:01<00:00,  4.33it/s]


Now we have the `venue_data` list, which is a list of dictonaries with the format:
```python
{
    "keywords": [
        "BakingPassion",
        "SweetTreats"
    ],
    "label": "Mimi`&`CocoBagelsAndBakery",
    "properties": {
        "name": "Mimi & Coco Bagels And Bakery",
        "rating": 5.0,
        "id": "DZDc1dCf8Xa-e3X76vYTJQ"
    }
}
```

The strings in the keyword list are the labels of the keyword nodes. The label of the venue node is stored in the `label` field, and the Venue's properties are stored in the `properties` field.

In [63]:
from typing import Dict, List, Union

def make_safe(val: Union[str| float]) -> Union[str | float]:
    if type(val) == str:
        return val.replace("'", "\\'")
    return val
    
def generate_cypher(venue: Dict[str, Union[str, float, List[str]]]) -> str:
    e_statements = []
    r_statements = []

    venue_properties = ", ".join([f"{key}: '{make_safe(value)}'" for key, value in venue['properties'].items()]) if 'properties' in venue else ""
    venue_cypher = f"MERGE (v:Venue {{ {venue_properties} }})"
    e_statements.append(venue_cypher)

    for i, keyword in enumerate(venue['keywords']):
        keyword_cypher = f"MERGE (k{i+1}:Keyword {{ value: '{keyword}' }})"
        e_statements.append(keyword_cypher)
        r_statements.append(f"MERGE (v)-[:HAS_KEYWORD]->(k{i+1})") 
    
    return e_statements, r_statements

cypher_statements = []

for venue in venue_data:
    e_statements, r_statements = generate_cypher(venue)
    cypher_statemnt = "\n".join(e_statements + r_statements)
    cypher_statements.append(cypher_statemnt)

with open("../data/cypher_statements.json", "w") as f:
    json.dump(cypher_statements, f, indent=4)

In [64]:
from neo4j import GraphDatabase

DB_USER = os.getenv("NEO4J_DATABASE_USERNAME")
DB_URL = os.getenv("NEO4J_DATABASE_URL")
DB_PASSWORD = os.getenv("NEO4J_DATABASE_PASSWORD")

driver = GraphDatabase.driver(DB_URL, auth=(DB_USER, DB_PASSWORD))
def execute_query(driver, query):
    with driver.session() as session:
        return session.run(query)

for statement in cypher_statements:
    execute_query(driver, statement)

driver.close()
