<a href="https://colab.research.google.com/github/tomknightatl/USCCB/blob/main/Find_Parish_Directory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# Cell 1: Install necessary libraries

import requests
from bs4 import BeautifulSoup
import sqlite3
import re
import os
from google.colab import userdata

In [8]:
# Cell 2: Clone GitHub repository and configure Git

# GitHub credentials
GITHUB_REPO = 'USCCB'
GITHUB_USERNAME = userdata.get('GitHubUserforUSCCB')
GITHUB_PAT = userdata.get('GitHubPATforUSCCB')

# GitHub repository URL
REPO_URL = f"https://{GITHUB_USERNAME}:{GITHUB_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

# Check if the repository directory already exists
if not os.path.exists(GITHUB_REPO):
    # Clone the repository
    !git clone {REPO_URL}
    os.chdir(GITHUB_REPO)
else:
    print(f"Repository {GITHUB_REPO} already exists. Updating...")
    os.chdir(GITHUB_REPO)
    !git pull origin main

# Configure Git
!git config --global user.email "tomk@github.leemail.me"
!git config --global user.name "tomknightatl"

Cloning into 'USCCB'...
remote: Enumerating objects: 114, done.[K
remote: Counting objects: 100% (114/114), done.[K
remote: Compressing objects: 100% (98/98), done.[K
remote: Total 114 (delta 64), reused 30 (delta 14), pack-reused 0 (from 0)[K
Receiving objects: 100% (114/114), 65.98 KiB | 1.29 MiB/s, done.
Resolving deltas: 100% (64/64), done.


In [9]:
# Cell 3: Import required libraries and fetch the main page
url = "https://archatl.com/"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

In [10]:
# Cell 4: Find the URL for the parish listing page

parish_link = soup.find('a', text=re.compile('Find a Parish'))
if parish_link:
    parish_url = parish_link['href']
    print(f"Parish listing URL: {parish_url}")
else:
    print("Parish listing URL not found")
    parish_url = None

Parish listing URL: https://archatl.com/parishes/find-a-parish/


  parish_link = soup.find('a', text=re.compile('Find a Parish'))


In [11]:
# Cell 5: Fetch and parse the parish listing page

if parish_url:
    parish_response = requests.get(parish_url)
    parish_soup = BeautifulSoup(parish_response.content, 'html.parser')
    print("Parish listing page fetched successfully")
else:
    print("Cannot fetch parish listing page")
    parish_soup = None

Parish listing page fetched successfully


In [12]:
# Cell 6: Extract parish information and store in SQLite database

conn = sqlite3.connect('data.db')
cursor = conn.cursor()

# Create table
cursor.execute('''CREATE TABLE IF NOT EXISTS parishes
                  (name TEXT, address TEXT, url TEXT)''')

if parish_soup:
    # Try different selectors to find parish information
    parishes = parish_soup.find_all('div', class_='parish-item')
    if not parishes:
        parishes = parish_soup.find_all('div', class_='parish')
    if not parishes:
        parishes = parish_soup.find_all('div', class_=re.compile('parish'))

    print(f"Found {len(parishes)} potential parish elements")

    for parish in parishes:
        name = parish.find('h2')
        if name:
            name = name.text.strip()
        else:
            name = "Name not found"

        address = parish.find('p', class_=re.compile('address'))
        if address:
            address = address.text.strip()
        else:
            address = "Address not found"

        url = parish.find('a', href=re.compile('http'))
        if url:
            url = url['href']
        else:
            url = "URL not found"

        cursor.execute("INSERT INTO parishes VALUES (?, ?, ?)", (name, address, url))

    conn.commit()
    print(f"Inserted {len(parishes)} parishes into the database")
else:
    print("No parish information to extract")

Found 0 potential parish elements
Inserted 0 parishes into the database


In [13]:
# Cell 7: Verify the data in the SQLite database

cursor.execute("SELECT * FROM parishes LIMIT 5")
rows = cursor.fetchall()
for row in rows:
    print(row)

conn.close()
print("\nDatabase connection closed")


Database connection closed


In [14]:
# Cell 8: Commit changes and push to GitHub
# Add changes to git
!git add data.db

# Commit changes
!git commit -m "Added one sample record in data.db using Clone_and_Update_GitHub_data.ipynb"

# Push changes to GitHub
!git push origin main

[main 2dd0c21] Added one sample record in data.db using Clone_and_Update_GitHub_data.ipynb
 1 file changed, 0 insertions(+), 0 deletions(-)
Enumerating objects: 5, done.
Counting objects: 100% (5/5), done.
Delta compression using up to 2 threads
Compressing objects: 100% (3/3), done.
Writing objects: 100% (3/3), 461 bytes | 461.00 KiB/s, done.
Total 3 (delta 2), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (2/2), completed with 2 local objects.[K
To https://github.com/tomknightatl/USCCB.git
   f0fd386..2dd0c21  main -> main
