In [79]:
#connects to mysql and creates database and tables
import config
import mysql.connector
from mysql.connector import errorcode
#database string
endpoint = config.DatabaseEndpoint
name = config.DatabaseName
password = config.DatabasePassword
port = config.DatabasePort
dbName = 'YelpPizza'

cnx = mysql.connector.connect(
    host = endpoint,
    user = name,
    passwd = password,
)


#create database given cursor and name of database
def create_database(cursor, database):
    try:
        cursor.execute(
        "CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(database))
    except mysql.connector.Error as err:
        print('Failed creating database: {}'.format(err))
        exit(1)

#create tables from dictionary
def create_tables(tables, cursor):
    for table_name in tables:
        table_description = tables[table_name]
        try:
            print("Creating table {}: ".format(table_name), end='')
            cursor.execute(table_description)
        except mysql.connector.Error as err:
            if err.errno == errorcode.ER_TABLE_EXISTS_ERROR:
                print('already exists')
            else:
                print(err.msg)
        else:
            print('OK')





cur = cnx.cursor()
#CREATE DATABASE AND TABLE SETUP

try:
    cur.execute("USE {}".format(dbName))
except mysql.connector.Error as err:
    print("Database {} does not exist.".format(dbName))
    if err.errno == errorcode.ER_BAD_DB_ERROR:
        create_database(cursor, dbName)
        print("Database {} created successfully.".format(dbName))
        cnx.database = dbName
    else:
        print(err)
        exit(1)     
        


#TABLE SETUP
TABLES = {}
TABLES['businesses'] ="""
CREATE TABLE businesses (
  businesses_id VARCHAR(255) PRIMARY KEY,
  businesses_name VARCHAR(255) NOT NULL,
  businesses_review_count INT,
  businesses_rating DECIMAL(10,2),
  businesses_price INT, 
  businesses_address VARCHAR(255)
)
"""
TABLES['reviews'] ="""
CREATE TABLE YelpPizza.reviews (
  reviews_id VARCHAR(255) PRIMARY KEY,
  reviews_text VARCHAR(5000),
  reviews_rating DECIMAL(10, 2),
  reviews_time_created DATE,
  reviews_user VARCHAR(255)
)
"""

create_tables(TABLES, cur)

Creating table businesses: OK
Creating table reviews: already exists


In [100]:
#connects to yelp api and pulls businesses data
import requests
import json
import config

API_KEY=config.APIKey
SEARCH_LIMIT = 50
currentOffset = 0


headers = {'Authorization': 'bearer %s'% API_KEY}

#businesses keys
businessKeys = ['id', 'name', 'review_count', 'rating', 'price', 'location']
#review keys
reviewKeys = []
#insert statements
add_business = ("INSERT INTO YelpPizza.businesses"
                "(businesses_id, businesses_name, businesses_review_count,"
                "businesses_rating, businesses_price, businesses_address) "
                "VALUES (%s, %s, %s, %s ,%s, %s)")
('stuff, sss', 'data')

def businessCallGET(args):
    businessEndpoint = 'https://api.yelp.com/v3/businesses/search'
    return requests.get(businessEndpoint, params=args, headers=headers)
while currentOffset < 1:
    #update params to pass
    params={'term':'pizza place','location':'Brooklyn NY',
        'limit': SEARCH_LIMIT, 'offset':currentOffset}
    #pass and get data from api
    data = businessCallGET(params).json()
    print(data)
    #clean data
    cleanBusinessAndInsert(data, businessKeys, add_business)
    #increment offset for new data
    currentOffset += 50

{'businesses': [{'id': 'sUvoBG8t3eKC7WhJbnro2w', 'alias': 'the-pizza-place-hewlett', 'name': 'The Pizza Place', 'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/19ybffZ3cvRMqzZpSWaJrw/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/the-pizza-place-hewlett?adjust_creative=F_8ybsDGOxK8FdQaLMXsGg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=F_8ybsDGOxK8FdQaLMXsGg', 'review_count': 92, 'categories': [{'alias': 'pizza', 'title': 'Pizza'}, {'alias': 'italian', 'title': 'Italian'}], 'rating': 3.5, 'coordinates': {'latitude': 40.640716, 'longitude': -73.699516}, 'transactions': [], 'price': '$$', 'location': {'address1': '1344 Broadway', 'address2': '', 'address3': '', 'city': 'Hewlett', 'zip_code': '11557', 'country': 'US', 'state': 'NY', 'display_address': ['1344 Broadway', 'Hewlett, NY 11557']}, 'phone': '+15163745900', 'display_phone': '(516) 374-5900', 'distance': 20271.82979363579}, {'id': 'EsHcyOLHaK76bQuU7bwiTw', 'alias': 'tonys-pizzeria-and-r

In [10]:
data

{'businesses': [{'id': 'sUvoBG8t3eKC7WhJbnro2w',
   'alias': 'the-pizza-place-hewlett',
   'name': 'The Pizza Place',
   'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/19ybffZ3cvRMqzZpSWaJrw/o.jpg',
   'is_closed': False,
   'url': 'https://www.yelp.com/biz/the-pizza-place-hewlett?adjust_creative=F_8ybsDGOxK8FdQaLMXsGg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=F_8ybsDGOxK8FdQaLMXsGg',
   'review_count': 92,
   'categories': [{'alias': 'pizza', 'title': 'Pizza'},
    {'alias': 'italian', 'title': 'Italian'}],
   'rating': 3.5,
   'coordinates': {'latitude': 40.640716, 'longitude': -73.699516},
   'transactions': [],
   'price': '$$',
   'location': {'address1': '1344 Broadway',
    'address2': '',
    'address3': '',
    'city': 'Hewlett',
    'zip_code': '11557',
    'country': 'US',
    'state': 'NY',
    'display_address': ['1344 Broadway', 'Hewlett, NY 11557']},
   'phone': '+15163745900',
   'display_phone': '(516) 374-5900',
   'distance': 20271.

In [None]:
#getting review data
# Create a table for the reviews
def reviewCallGET(id_num):
    reviewEndpoint_base = 'https://api.yelp.com/v3/businesses/'
    reviewEndpoint_end = '/reviews'
    return requests.get(reviewEndpoint_base + id_num + reviewEndpoint_end,headers=headers)

test = 'ysqgdbSrezXgVwER2kQWKA'
# review_params={'id':'ysqgdbSrezXgVwER2kQWKA'}
review_data=reviewCallGET(test).json()
print(review_data)

In [99]:
#cleaning the businesses data

def cleanBusinessAndInsert(dat, keys, add_business):
    #loops each business
    print(dat)
    for i in range(len(dat['businesses'])):
        something = []

        #cleaning loop
        for x in keys:
            try :
                if (x == 'price'):
                    something.append(len(dat['businesses'][i][x]))
                elif (x == 'location'):
                    str_piece = ''
                    if (len(dat['businesses'][i][x]['display_address']) > 0):
                        str_piece = dat['businesses'][i][x]['display_address'][0]
                        if (len(dat['businesses'][i][x]['display_address']) > 1):
                            str_piece += dat['businesses'][i][x]['display_address'][1]
                    else:
                        str_piece = 'null'
                    something.append(str_piece)
                else:
                    something.append(dat['businesses'][i][x])
            except  KeyError:
                if x == 'id':
                    print('id if statement broken')
                something.append('null')
        #insert new data row
        data_business = (something[0], something[1], something[2],
                     something[3], something[4], something[5])
        print(data_business)
        try:
            cur.execute(add_business, data_business)
            print('success: {}'.format(i + currentOffset))
        except mysql.connector.Error as err:
            print(err)
            print('skipped entry number: {}'.format(i + currentOffset))


module

In [61]:
add_business = ("INSERT INTO businesses"
                "(businesses_id, businesses_name, businesses_review_count,"
                "businesses_rating, businesses_price, businesses_address) "
                "VALUES (%s, %s, %s, %s ,%s, %s)")
businessKeys = ['id', 'name', 'review_count', 'rating', 'price', 'location']

cur.execute(add_business, data_business)


In [92]:
data

{'businesses': [{'id': 'zFOtrdtiJ-1woygG7PoaNA',
   'alias': 'bjs-pizzeria-east-orange',
   'name': "Bj's Pizzeria",
   'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/SdSWluiAhZm5JZ4xJ48CHQ/o.jpg',
   'is_closed': False,
   'url': 'https://www.yelp.com/biz/bjs-pizzeria-east-orange?adjust_creative=F_8ybsDGOxK8FdQaLMXsGg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=F_8ybsDGOxK8FdQaLMXsGg',
   'review_count': 7,
   'categories': [{'alias': 'sandwiches', 'title': 'Sandwiches'},
    {'alias': 'pizza', 'title': 'Pizza'}],
   'rating': 4.0,
   'coordinates': {'latitude': 40.75183, 'longitude': -74.22777},
   'transactions': [],
   'price': '$',
   'location': {'address1': '230 Tremont Ave',
    'address2': None,
    'address3': '',
    'city': 'East Orange',
    'zip_code': '07018',
    'country': 'US',
    'state': 'NJ',
    'display_address': ['230 Tremont Ave', 'East Orange, NJ 07018']},
   'phone': '+19736724340',
   'display_phone': '(973) 672-4340',
   'd

In [78]:

cur.execute('''
DROP TABLE businesses
;
''')

In [97]:
# import pandas as pd

# cur.execute("""
# SELECT * 
# FROM YelpPizza.businesses
# ORDER BY businesses_rating
# ;
# """)
# df = pd.DataFrame(cur.fetchall())
# df.columns = [x[0] for x in cur.description]
df

Unnamed: 0,businesses_id,businesses_name,businesses_review_count,businesses_rating,businesses_price,businesses_address
0,b6QXXgkcObAzb1EQyQx5Nw,Taco Bell / Pizza Hut,0,0.00,0,3908th Ave
1,5wfKQ8wQOKz56Gj7GcyrfQ,Domino's Pizza,3,1.00,0,"886 Dekalb AveBrooklyn, NY 11221"
2,DMkBrk9SUXcwCkXSyT787A,Keep it Halal,2,1.00,0,"1162 Coney Island AveBrooklyn, NY 11230"
3,EVD6nDw-iXoPAFpf1jn_9w,Domino's Pizza,78,1.00,1,"241 Rockaway PkwyBrooklyn, NY 11212"
4,KEBeh8JBy8z5tkupaV9IZQ,Gramma's pizza shop,1,1.00,0,"99B Featherbed LnBronx, NY 10452"
...,...,...,...,...,...,...
995,yyrXCNbXdXd6C6HQpYRWrQ,Heights Bar and Grill,1,5.00,0,"766 Classon AveBrooklyn, NY 11238"
996,z5IlqGy5KMath_ieugBlMg,Tale,12,5.00,0,"683 5th AveBrooklyn, NY 11215"
997,zcxZLypXNAi6E3G0T6ZVIQ,Joanne's Gourmet Pizza,3,5.00,0,"161 E Jericho TpkeMineola, NY 11501"
998,zFdIHdzQ1DuE_XSAKP4xdg,Tasty Munchies Pizza,1,5.00,0,"206 Rivington StNew York, NY 10002"
