In [5]:
import requests
import time
import pandas as pd
import sqlite3
import random

# Pick a random API key
random_number = random.randint(1, 2)
print(random_number)

API_KEY = {}
API_KEY[1] = 'SiUTjHwmSKGao_Kh9RgbV5-OviCLnyUXtQsOB3Z2BLg6UQyXXMafpUmKBpivwYzL42jZWtvdPvgSmuSi04lYSfzUrRXwrL1vf5OELGcIXETPkbz2Rb74Z_Uw4in7Z3Yx'
API_KEY[2] = 'aKr5-xMKrqpXVjItyd4R7iUSjGCupXKw94ISGtHY5i4ynl9j2zUMLVtilrt_NAb_J7o3ZHLAieowD11HpmcgbCkhurycCBhIkk0Wkg6Q9kAUAu2txURMAgAszCr7Z3Yx'

LIMIT = 50
MAX_RESULTS = 200

# Database config
DB_PATH = "../db/database_sqlite.db"
TABLE_NAME = "restaurant_info"

headers = {'Authorization': f'Bearer {API_KEY[random_number]}'}
base_url = 'https://api.yelp.com/v3/businesses/search'


class YelpDataManager:
    def __init__(self, db_path):
        self.db_path = db_path

    # Create restaurant table
    def create_table(self):
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        cursor.execute(f"""
            CREATE TABLE IF NOT EXISTS {TABLE_NAME} (
                Restaurant_Info_ID INTEGER PRIMARY KEY AUTOINCREMENT,
                Business_id INTEGER,
                Alias TEXT UNIQUE,
                Name TEXT,
                Rating REAL,
                Review_Count INTEGER,
                Positive_Review_Count INTEGER,
                Price TEXT,
                Address TEXT,
                Address1 TEXT,
                Address2 TEXT,
                City TEXT,
                Zip_Code TEXT,
                State TEXT,
                Country TEXT,
                Latitude REAL,
                Longitude REAL,
                Categories TEXT,
                Transactions TEXT,
                Phone TEXT,
                Operating_Hours TEXT,
                Is_Closed INTEGER,
                Is_Claimed INTEGER,
                Url TEXT,
                Created_At DATETIME DEFAULT (datetime('now')),
                Is_Processed INTEGER DEFAULT 0,
                Is_Valid INTEGER DEFAULT 1
            )
        """)
        conn.commit()
        conn.close()

    # Insert restaurant records
    def insert_restaurants(self, df: pd.DataFrame):
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        inserted = 0
        skipped = 0

        for _, row in df.iterrows():
            cursor.execute(
                f"SELECT 1 FROM {TABLE_NAME} WHERE Alias = ?",
                (row['Alias'],)
            )
            if cursor.fetchone():
                skipped += 1
                continue

            cursor.execute(f"""
                INSERT INTO {TABLE_NAME} (
                    Business_id, Alias, Name, Rating, Review_Count, Price,
                    Address, Address1, Address2, City, Zip_Code, State, Country,
                    Latitude, Longitude, Categories, Transactions, Phone,
                    Operating_Hours, Is_Closed, Is_Claimed, Url, Is_Processed
                )
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0)
            """, tuple(row))
            inserted += 1

        conn.commit()
        conn.close()
        print(f"Inserted {inserted}, skipped {skipped}")

    # Clean invalid restaurants
    def always_clean_data(self):
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        cursor.execute(f"""
            UPDATE {TABLE_NAME}
            SET Is_Valid = 0
            WHERE Zip_Code IN (
                SELECT Zip_Code FROM la_zip_code WHERE Is_City_Zip = 1
            ) OR Address1 IS NULL
        """)
        conn.commit()
        conn.close()

    # Mark ZIP as processed
    def mark_zip_as_processed(self, zip_code):
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        cursor.execute(
            "UPDATE la_zip_code SET Is_Processed = 1 WHERE Zip_Code = ?",
            (zip_code,)
        )
        conn.commit()
        conn.close()

    # Yelp search parameters
    def search_parameter(self):
        return {
            'transactions': ['delivery'],
            'radius': '5000',
            'price': ['1', '2', '3', '4'],
            'locale': 'en_US',
            'open_now': 'True',
            'sort_by': ['best_match', 'rating', 'review_count', 'distance']
        }


# Get unprocessed LA city ZIP codes
conn = sqlite3.connect(DB_PATH)
zip_df = pd.read_sql("""
    SELECT Zip_Code FROM la_zip_code
    WHERE Is_City_Zip = 1 AND Is_Processed = 0
""", conn)
conn.close()

if zip_df.empty:
    print("All ZIP codes processed")
    exit()

manager = YelpDataManager(DB_PATH)
manager.create_table()

for zip_code in zip_df['Zip_Code']:
    all_results = []

    for key, values in manager.search_parameter().items():
        if not isinstance(values, list):
            values = [values]

        for value in values:
            for offset in range(0, MAX_RESULTS, LIMIT):
                params = {
                    'categories': 'restaurants',
                    'location': zip_code,
                    'limit': LIMIT,
                    'offset': offset,
                    key: value
                }

                response = requests.get(base_url, headers=headers, params=params)
                if response.status_code != 200:
                    break

                businesses = response.json().get('businesses', [])
                if not businesses:
                    break

                for b in businesses:
                    all_results.append({
                        'Business_id': b['id'],
                        'Alias': b['alias'],
                        'Name': b['name'],
                        'Rating': b['rating'],
                        'Review_Count': b['review_count'],
                        'Price': b.get('price', ''),
                        'Address': " ".join(b['location']['display_address']),
                        'Address1': b['location'].get('address1', ''),
                        'Address2': b['location'].get('address2', ''),
                        'City': b['location'].get('city', ''),
                        'Zip_Code': b['location'].get('zip_code', ''),
                        'State': b['location'].get('state', ''),
                        'Country': b['location'].get('country', ''),
                        'Latitude': b['coordinates']['latitude'],
                        'Longitude': b['coordinates']['longitude'],
                        'Categories': ", ".join([c['title'] for c in b['categories']]),
                        'Transactions': ", ".join(b.get('transactions', [])),
                        'Phone': b['phone'],
                        'Operating_Hours': '',
                        'Is_Closed': b.get('is_closed', False),
                        'Is_Claimed': b.get('is_claimed', False),
                        'Url': b['url']
                    })

                time.sleep(random.uniform(0.02, 2))

    df = pd.DataFrame(all_results)
    if not df.empty:
        manager.insert_restaurants(df)
    manager.mark_zip_as_processed(zip_code)

manager.always_clean_data()

6
Created table 'restaurant_info' with Is_Processed flag.
Total businesses collected for ZIP 91607: 6438
✅ Inserted 30 new restaurants
⏭️ Skipped 6408 duplicates based on Alias



**View All data** 

In [2]:
import pandas as pd
import sqlite3

# Database path
DB_PATH = "../db/database_sqlite.db"

# Table to load
TABLE_NAME = "restaurant_info"

# Connect and read table
conn = sqlite3.connect(DB_PATH)
df = pd.read_sql_query(f"SELECT * FROM {TABLE_NAME}", conn)
conn.close()

# Display data
df

Unnamed: 0,Restaurant_Info_ID,Business_id,Alias,Name,Rating,Review_Count,Positive_Review_Count,Price,Address,Address1,...,Categories,Transactions,Phone,Operating_Hours,Is_Closed,Is_Claimed,Url,Created_At,Is_Processed,Is_Valid
0,1,RYpJyi1RhScLZAYrqPMLKQ,avilas-el-ranchito-huntington-park,Avila's El Ranchito,4.4,1189,,$$,"6703 Santa Fe Ave Huntington Park, CA 90255",6703 Santa Fe Ave,...,Mexican,"delivery, pickup",+13235855055,,0,0,https://www.yelp.com/biz/avilas-el-ranchito-hu...,2025-04-13 14:16:56,0,0
1,2,elP62NxBVs2v_OKXozQZuA,poutine-brothers-los-angeles-3,Poutine Brothers,4.6,205,,$$,"Los Angeles , CA 90001",,...,"Poutineries, Food Trucks",pickup,+12134373599,,0,0,https://www.yelp.com/biz/poutine-brothers-los-...,2025-04-13 14:16:56,0,0
2,3,spKUIFIABLaU63Ex4PEYvQ,ray-s-bbq-huntington-park,Ray’s BBQ,4.4,1097,,$$,"6038 Santa Fe Ave Huntington Park, CA 90255",6038 Santa Fe Ave,...,"Barbeque, Smokehouse",delivery,+13106896353,,0,0,https://www.yelp.com/biz/ray-s-bbq-huntington-...,2025-04-13 14:16:56,0,0
3,4,HYhYc92rD7O7iIs7TmXvlg,cruzitas-deli-and-cafe-huntington-park,Cruzita's Deli and Cafe,4.6,388,,$$,"7121 State St Huntington Park, CA 90255",7121 State St,...,"Cafes, Delis",delivery,+13238356175,,0,0,https://www.yelp.com/biz/cruzitas-deli-and-caf...,2025-04-13 14:16:56,0,0
4,5,_vVQjfThhJUZz7wdh_GOyw,rajas-con-crema-maywood,Rajas con Crema,4.8,516,,$$,"3630 Slauson Ave Maywood, CA 90270",3630 Slauson Ave,...,"Beer, Wine & Spirits, Mexican, Italian","delivery, pickup, restaurant_reservation",+13233425132,,0,0,https://www.yelp.com/biz/rajas-con-crema-maywo...,2025-04-13 14:16:56,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20912,20913,du0xbgAbaHKxHm1h4fpTrA,wolfgang-puck-express-valley-village,Wolfgang Puck Express,3.4,42,,$$,"4738 Laurel Canyon Blvd Valley Village, CA 91607",4738 Laurel Canyon Blvd,...,Pizza,delivery,+18189065743,,0,0,https://www.yelp.com/biz/wolfgang-puck-express...,2025-04-14 04:07:36,1,1
20913,20914,R2WI7pOXYAZ0BuMjxKinVA,magnolia-thai-cuisine-north-hollywood,Magnolia Thai Cuisine,3.3,11,,,"11688 Magnolia Blvd North Hollywood , CA 91601",11688 Magnolia Blvd,...,Thai,"delivery, pickup",+18187634463,,0,0,https://www.yelp.com/biz/magnolia-thai-cuisine...,2025-04-14 04:07:36,1,1
20914,20915,-6SeB8PY-OinSzniw4Ea0w,brooklyn-egg-and-cheese-los-angeles,Brooklyn Egg & Cheese,5.0,2,,,"5000 Colfax Ave Los Angeles, CA 91601",5000 Colfax Ave,...,"Breakfast & Brunch, Food Stands, Sandwiches",,,,0,0,https://www.yelp.com/biz/brooklyn-egg-and-chee...,2025-04-14 04:07:36,1,1
20915,20916,dRIzkh69FObPdZgqlQy0AA,nectar-hotel-mariposa-san-fernando-valley,Nectar - Hotel Mariposa,0.0,0,,,"12828 W Riverside Dr San Fernando Valley, CA 9...",12828 W Riverside Dr,...,"Coffee & Tea, Breakfast & Brunch, Sandwiches",,+18182100800,,0,0,https://www.yelp.com/biz/nectar-hotel-mariposa...,2025-04-14 04:07:36,1,1
