In [26]:
import pandas as pd
import numpy as np

from tabulate import tabulate

import requests
import io
import json
from bs4 import BeautifulSoup

import duckdb

In [24]:
def show_table(table):
    print(tabulate(table, headers='keys', showindex=False, tablefmt='presto'))

In [12]:
location_base_url = 'https://ccclib.bibliocommons.com/v2/locations'

In [13]:
response = requests.get(location_base_url)

In [33]:
response.raise_for_status()

In [27]:
status_code = response.status_code
print(status_code)

if status_code == 200:
    location_html_content = response.text
else:
    print(response.text)

200


In [28]:
print(location_html_content)

<!DOCTYPE html>
<html lang="en-US" >
  <head>
    <!-- Data Layer -->
    <script>
      dataLayer = [{"bc.localBranch":null,"bc.product":"core","bc.gaCrossDomainEnabled":false,"bc.cmsEnabled":true,"bc.language":"en-US","bc.externalEventsGAId":null,"bc.libraryId":154,"bc.ga4MeasurementId":"G-ZET0BKEQP0","bc.version":"nerf17 Version 9.20.2@c0175bc Last updated 2024/02/08 12:56 Built 2024/02/08 16:06","bc.gaCrossDomainNames":[],"bc.externalCoreGAId":"UA-387101-12","bc.externalGaId":null,"bc.siteId":"CA-CONTRACOSTA","context":{},"bc.domain":"ccclib","bc.cmsUrl":"https://ccclib.org","bc.longName":"Contra Costa County Library","bc.allowGoogleAdFeatures":false,"bc.shortName":"CCCLib","bc.crazyEggId":"00178067"}];
    </script>
    <!-- end Data Layer -->
    <!-- Start Google Tag Manager-->
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
            new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
            j=d.createElement(s),dl=l!='dataLay

In [17]:
bs = BeautifulSoup(location_html_content, 'html.parser')

In [21]:
locations_data = []

for item in bs.select('article.cp-location-item-info'):
    city = item.select_one('h2 > a').text
    link = item.select_one('h2 > a')['href']
    lid = int(link.split("/")[-1])
    contacts = item.select_one('div.contacts')
    phone = contacts.select_one('a').get_text() if contacts else None
    address1 = item.select_one('div.cp-location-address').select('div')[0].text
    address2 = item.select_one('div.cp-location-address').select('div')[1].text
    address = address1 + ' ' + address2
    location_json_data = {
        'id': lid,
        'city': city,
        'website': link,
        'phone_number': phone,
        'address': address
    }
    print(location_json_data)
    locations_data.append(location_json_data)

{'id': 1, 'city': 'Antioch', 'website': 'https://ccclib.org/locations/1', 'phone_number': '(925) 757-9224', 'address': '501 W. 18th Street Antioch, CA 94509'}
{'id': 2, 'city': 'Bay Point', 'website': 'https://ccclib.org/locations/2', 'phone_number': '(925) 458-9597', 'address': '205 Pacifica Avenue Bay Point, CA 94565'}
{'id': 4, 'city': 'Brentwood', 'website': 'https://ccclib.org/locations/4', 'phone_number': '(925) 516-5290', 'address': '104 Oak Street Brentwood, California 94513'}
{'id': 6, 'city': 'Clayton', 'website': 'https://ccclib.org/locations/6', 'phone_number': '(925) 673-0659', 'address': '6125 Clayton Road Clayton, CA 94517'}
{'id': 7, 'city': 'Concord', 'website': 'https://ccclib.org/locations/7', 'phone_number': '(925) 646-5455', 'address': '2900 Salvio Street Concord, CA 94519'}
{'id': 8, 'city': 'Crockett', 'website': 'https://ccclib.org/locations/8', 'phone_number': '(510) 787-2345', 'address': '991 Loring Avenue Crockett, CA 94525'}
{'id': 9, 'city': 'Danville', 'we

In [25]:
locations = pd.DataFrame(locations_data).sort_values(by='id')
show_table(locations)

   id | city                           | website                         | phone_number   | address
------+--------------------------------+---------------------------------+----------------+-------------------------------------------------
    1 | Antioch                        | https://ccclib.org/locations/1  | (925) 757-9224 | 501 W. 18th Street Antioch, CA 94509
    2 | Bay Point                      | https://ccclib.org/locations/2  | (925) 458-9597 | 205 Pacifica Avenue Bay Point, CA 94565
    4 | Brentwood                      | https://ccclib.org/locations/4  | (925) 516-5290 | 104 Oak Street Brentwood, California 94513
    6 | Clayton                        | https://ccclib.org/locations/6  | (925) 673-0659 | 6125 Clayton Road Clayton, CA 94517
    7 | Concord                        | https://ccclib.org/locations/7  | (925) 646-5455 | 2900 Salvio Street Concord, CA 94519
    8 | Crockett                       | https://ccclib.org/locations/8  | (510) 787-2345 | 991 Loring Ave

In [29]:
show_table(duckdb.query("SELECT * FROM locations ORDER BY 1").df())

   id | city                           | website                         | phone_number   | address
------+--------------------------------+---------------------------------+----------------+-------------------------------------------------
    1 | Antioch                        | https://ccclib.org/locations/1  | (925) 757-9224 | 501 W. 18th Street Antioch, CA 94509
    2 | Bay Point                      | https://ccclib.org/locations/2  | (925) 458-9597 | 205 Pacifica Avenue Bay Point, CA 94565
    4 | Brentwood                      | https://ccclib.org/locations/4  | (925) 516-5290 | 104 Oak Street Brentwood, California 94513
    6 | Clayton                        | https://ccclib.org/locations/6  | (925) 673-0659 | 6125 Clayton Road Clayton, CA 94517
    7 | Concord                        | https://ccclib.org/locations/7  | (925) 646-5455 | 2900 Salvio Street Concord, CA 94519
    8 | Crockett                       | https://ccclib.org/locations/8  | (510) 787-2345 | 991 Loring Ave