In [9]:
import requests
from bs4 import BeautifulSoup
import sqlite3

RESTAURANT_TABLE_NAME = "restaurant_info"


class ZipCodeManager:
    def __init__(self, db_path="../db/database_sqlite.db", table_name="la_zip_code"):
        self.db_path = db_path
        self.table_name = table_name

    # Create ZIP code table
    def create_table(self):
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        cursor.execute(f"""
            CREATE TABLE IF NOT EXISTS {self.table_name} (
                Zip_Code_ID INTEGER PRIMARY KEY AUTOINCREMENT,
                Zip_Code TEXT NOT NULL UNIQUE,
                Is_City_Zip INTEGER DEFAULT 0,
                Created_At DATETIME DEFAULT (datetime('now')),
                Is_Processed INTEGER DEFAULT 0
            )
        """)
        conn.commit()
        conn.close()
        print(f"Created table '{self.table_name}'.")

    # Insert ZIP codes and mark LA city ZIPs
    def insert_zip_codes(self, zip_codes, la_city_zip_codes):
        if not zip_codes:
            print("No ZIP codes provided.")
            return

        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        inserted = 0
        skipped = 0

        for zip_code in zip_codes:
            cursor.execute(
                f"SELECT 1 FROM {self.table_name} WHERE Zip_Code = ?",
                (zip_code,)
            )
            if cursor.fetchone():
                skipped += 1
                continue

            is_city_zip = 1 if zip_code in la_city_zip_codes else 0

            cursor.execute(f"""
                INSERT INTO {self.table_name} (Zip_Code, Is_City_Zip, Is_Processed)
                VALUES (?, ?, 0)
            """, (zip_code, is_city_zip))

            inserted += 1

        conn.commit()
        conn.close()

        print(f"Inserted {inserted} ZIP codes.")
        print(f"Skipped {skipped} duplicates.")


if __name__ == "__main__":
    # Official LA city ZIP codes
    la_city_zip_codes = [
        '90001','90002','90003','90004','90005','90006','90007','90008','90010','90011',
        '90012','90013','90014','90015','90016','90017','90018','90019','90020','90021',
        '90023','90024','90025','90026','90027','90028','90029','90031','90032','90033',
        '90034','90035','90036','90037','90038','90039','90041','90042','90043','90044',
        '90045','90046','90047','90048','90049','90056','90057','90058','90059','90061',
        '90062','90063','90064','90065','90066','90067','90068','90069','90071','90073',
        '90077','90079','90089','90094','90095','90230','90232','90247','90248','90272',
        '90291','90292','90293','90302','90404','90501','90502','90710','90717','90731',
        '90732','90744','90810','91040','91042','91303','91304','91306','91307','91311',
        '91316','91324','91325','91326','91331','91335','91340','91342','91343','91344',
        '91345','91352','91356','91364','91367','91401','91402','91403','91405','91406',
        '91411','91423','91436','91601','91602','91604','91605','91606','91607'
    ]

    # Scrape California ZIP codes
    url = "https://www.unitedstateszipcodes.org/ca/"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Accept-Language": "en-US,en;q=0.9",
        "Referer": "https://www.google.com/"
    }

    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch page: {response.status_code}")

    soup = BeautifulSoup(response.content, "html.parser")
    zip_blocks = soup.find_all("div", class_="list-group-item")

    la_zip_codes = []
    for block in zip_blocks:
        zip_div = block.find("div", class_="prefix-col1")
        county_div = block.find("div", class_="prefix-col4")
        if zip_div and county_div:
            if "Los Angeles County" in county_div.text:
                la_zip_codes.append(zip_div.text.strip())

    print(f"Found {len(la_zip_codes)} LA County ZIP codes.")

    zip_manager = ZipCodeManager()
    zip_manager.create_table()
    zip_manager.insert_zip_codes(la_zip_codes, la_city_zip_codes)

 Found 528 ZIP codes in Los Angeles County.
✅ Created table 'la_zip_code' with Is_City_Zip and Is_Processed fields.
Inserted 0 new ZIP codes into 'la_zip_code'.
Skipped 528 duplicate ZIP codes.


In [7]:
import pandas as pd
import sqlite3

# Database path
DB_PATH = "../db/database_sqlite.db"

# Table to load
TABLE_NAME = "la_zip_code"

# Connect and read table
conn = sqlite3.connect(DB_PATH)
df = pd.read_sql_query(f"SELECT * FROM {TABLE_NAME}", conn)
conn.close()

# Display data
df

Unnamed: 0,Zip_Code_ID,Zip_Code,Is_City_Zip,Created_At,Is_Processed
0,1,90001,1,2025-04-13 14:10:08,1
1,2,90002,1,2025-04-13 14:10:08,1
2,3,90003,1,2025-04-13 14:10:08,1
3,4,90004,1,2025-04-13 14:10:08,1
4,5,90005,1,2025-04-13 14:10:08,1
...,...,...,...,...,...
523,524,93584,0,2025-04-13 14:10:08,0
524,525,93586,0,2025-04-13 14:10:08,0
525,526,93590,0,2025-04-13 14:10:08,0
526,527,93591,0,2025-04-13 14:10:08,1
