# CA DMV
> Fetch field office locations

---

#### Import Python tools and Jupyter config

In [40]:
import us
import json
import time
import random
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd
import altair_cnn as altcnn
from bs4 import BeautifulSoup
from urllib.parse import quote
from tqdm.notebook import tqdm

In [41]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
alt.themes.register("cnn", altcnn.theme)
alt.themes.enable("cnn")

ThemeRegistry.enable('cnn')

---

In [49]:
# Headers setup
headers = {
    "accept": "*/*",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
}

now = pd.Timestamp.now().strftime("%Y-%m-%d %H:00:00")

# Load field office URLs
df = pd.read_json("data/processed/dmv_locations.json")
field_office_urls = df["url"].to_list()

# List to store extracted wait times
times_dicts_list = []

# Loop through each URL and extract wait times
for url in tqdm(field_office_urls):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")

    # Extract appointment and non-appointment wait times safely
    try:
        appt_time_text = soup.find_all("span", class_="p medium")[0].text.strip()
        appt_time = (
            float(appt_time_text) if appt_time_text.lower() != "closed" else pd.NaT
        )
    except (IndexError, ValueError):
        appt_time = pd.NaT

    try:
        no_appt_time_text = soup.find_all("span", class_="p medium")[1].text.strip()
        no_appt_time = (
            float(no_appt_time_text)
            if no_appt_time_text.lower() != "closed"
            else pd.NaT
        )
    except (IndexError, ValueError):
        no_appt_time = pd.NaT

    times_dicts_list.append(
        {
            "location": url.strip().split("/")[5],
            "type": url.strip().split("/")[4],
            "appt_wait": appt_time,
            "no_appt_wait": no_appt_time,
            "captured": now,
        }
    )

# Optionally convert the list to a DataFrame or save it as needed
wait_times_df = pd.DataFrame(times_dicts_list)

  0%|          | 0/188 [00:00<?, ?it/s]

In [51]:
wait_times_df

Unnamed: 0,location,type,appt_wait,no_appt_wait,captured
0,long-beach,field-office,0.0,6.0,2024-09-30 08:00:00
1,los-angeles,field-office,0.0,7.0,2024-09-30 08:00:00
2,los-angeles-metro-ibc,field-office,0.0,0.0,2024-09-30 08:00:00
3,lincoln-park,field-office,0.0,4.0,2024-09-30 08:00:00
4,west-hollywood,field-office,0.0,1.0,2024-09-30 08:00:00
...,...,...,...,...,...
183,alturas,field-office,0.0,0.0,2024-09-30 08:00:00
184,fall-river-mills,field-office,NaT,NaT,2024-09-30 08:00:00
185,mt-shasta,field-office,NaT,NaT,2024-09-30 08:00:00
186,red-bluff,field-office,0.0,4.0,2024-09-30 08:00:00


In [None]:
wait_times_df.to_csv("data/processed/wait_times.csv", index=False)
wait_times_df.to_json("data/processed/wait_times.json", indent=4, orient="records")