# Parsing PDF and text extraction

In [None]:
# Install libraries
!pip install pandas PyPDF2 python-dotenv requests PyMuPDF langchain langchain-community pdf2image pillow pytesseract
!apt-get install tesseract-ocr

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting PyMuPDF
  Downloading pymupdf-1.25.4-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.20-py3-none-any.whl.metadata (2.4 kB)
Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting langchain
  Downloading langchain-0.3.21-py3-none-any.whl.metadata (7.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-c

In [None]:
import os
import PyPDF2
import fitz  # PyMuPDF
from pdf2image import convert_from_path
from PIL import Image
import pytesseract

# Ensure Tesseract is in your system's PATH or specify the executable path
# pytesseract.pytesseract.tesseract_cmd = r'<full_path_to_your_tesseract_executable>'

def crop_image(element, pageObj):
    """
    Crop an image from a PDF page based on the element's coordinates.
    """
    # Get the coordinates to crop the image from the PDF
    [image_left, image_top, image_right, image_bottom] = [element.x0, element.y0, element.x1, element.y1]

    # Crop the page using coordinates (left, bottom, right, top)
    pageObj.mediabox.lower_left = (image_left, image_bottom)
    pageObj.mediabox.upper_right = (image_right, image_top)

    # Save the cropped page to a new PDF
    cropped_pdf_writer = PyPDF2.PdfWriter()
    cropped_pdf_writer.add_page(pageObj)
    cropped_pdf_path = 'cropped_image.pdf'

    with open(cropped_pdf_path, 'wb') as cropped_pdf_file:
        cropped_pdf_writer.write(cropped_pdf_file)

    return cropped_pdf_path

def convert_to_images(input_file):
    """
    Convert a PDF file to images.
    """
    images = convert_from_path(input_file, dpi=500)
    return images

def image_to_text(image_path):
    """
    Extract text from an image using OCR.
    """
    img = Image.open(image_path)
    text = pytesseract.image_to_string(img)
    return text

def process_pdf(file_path, output_dir):
    """
    Process a PDF file to extract text and images.
    """
    try:
        with open(file_path, 'rb') as pdfFileObj:
            pdfReaded = PyPDF2.PdfReader(pdfFileObj)
            doc = fitz.open(file_path)
            n = doc.page_count
            doc_content = ""

            for i in range(n):
                page_n = doc.load_page(i)
                page_content = page_n.get_text("blocks")

                for element in page_content:
                    if element[6] == 0:  # If the element is text
                        doc_content += element[4]
                    else:  # If the element is an image
                        pageObj = pdfReaded.pages[i]
                        cropped_pdf_path = crop_image(element, pageObj)
                        images = convert_to_images(cropped_pdf_path)
                        if images:
                            images[0].save('temp_image.png', "PNG")
                            image_text = image_to_text('temp_image.png')
                            doc_content += image_text

                doc_content += "\n"

            # Save the extracted content to a text file
            txt_file = os.path.join(output_dir, os.path.basename(file_path).replace('.pdf', '.txt'))
            with open(txt_file, 'w', encoding='utf-8') as file:
                file.write(doc_content)
            print(f"Saved file: {txt_file}")

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

def main():
    data_path = "./data/raw/pdf_data"  # Path to the folder where PDF files are available
    output_dir = "./data/scraped/scraped_pdf_text_data"

    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Process each PDF file in the directory
    for file in os.listdir(data_path):
        if file.endswith(".pdf"):
            file_path = os.path.join(data_path, file)
            print(f"Processing file: {file_path}")
            process_pdf(file_path, output_dir)

if __name__ == "__main__":
    main()


Processing file: ./data/raw/pdf_data/18398_Annual_Comprehensive_Financial_Report_December_31,_2021.pdf
Saved file: ./data/scraped/scraped_pdf_text_data/18398_Annual_Comprehensive_Financial_Report_December_31,_2021.txt
Processing file: ./data/raw/pdf_data/14147_Pittsburgh_CAR_PDF_Copy.pdf
Saved file: ./data/scraped/scraped_pdf_text_data/14147_Pittsburgh_CAR_PDF_Copy.txt
Processing file: ./data/raw/pdf_data/24770_Pittsburgh,_City_of_ACFR_FINAL_2023.pdf
Saved file: ./data/scraped/scraped_pdf_text_data/24770_Pittsburgh,_City_of_ACFR_FINAL_2023.txt
Processing file: ./data/raw/pdf_data/9623_ISP_Tax_Regulations.pdf
Saved file: ./data/scraped/scraped_pdf_text_data/9623_ISP_Tax_Regulations.txt
Processing file: ./data/raw/pdf_data/23255_2024_Operating_Budget.pdf
Saved file: ./data/scraped/scraped_pdf_text_data/23255_2024_Operating_Budget.txt
Processing file: ./data/raw/pdf_data/9624_Local_Services_Tax_Regulations.pdf
Saved file: ./data/scraped/scraped_pdf_text_data/9624_Local_Services_Tax_Regula

# Scraping Webpage and Check Duplicate Sublink

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
import warnings
from urllib.parse import urljoin
from urllib3.exceptions import InsecureRequestWarning
import csv
from urllib.parse import urlparse, urlunparse

# Suppress warnings from unverified HTTPS requests
warnings.filterwarnings('ignore', category=InsecureRequestWarning)

# Suppress XMLParsedAsHTMLWarning if parsing XML with HTML parser
from bs4 import XMLParsedAsHTMLWarning
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)

# User agents for simulating browser requests
user_agents = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
]

# Initialize a session for persistent connection
session = requests.Session()

# Function to fetch the page text with a retry mechanism
def fetch_page_text(url, retries=3, timeout=5):
    """Fetch page text and return the parsed BeautifulSoup object."""
    for attempt in range(retries):
        try:
            # Send a GET request to the URL
            headers = {'User-Agent': random.choice(user_agents)}
            response = session.get(url, timeout=timeout, headers=headers, verify=False)
            response.raise_for_status()  # Raise an error for bad responses

            # Parse the page content
            soup = BeautifulSoup(response.content, 'lxml')
            return soup.get_text(separator='\n', strip=True), soup

        except requests.exceptions.HTTPError as e:
            # Handle HTTP errors (e.g., 404, 400)
            print(f"Attempt {attempt + 1} failed: {e}")
            break  # Stop retrying on HTTP errors

        except requests.exceptions.Timeout as e:
            # Handle timeout errors
            print(f"Attempt {attempt + 1} failed due to timeout: {e}")
            time.sleep(1)  # Wait before retrying

        except requests.exceptions.ConnectionError as e:
            # Handle connection errors
            print(f"Attempt {attempt + 1} failed due to connection error: {e}")
            time.sleep(1)  # Wait before retrying

        except Exception as e:
            # Handle any other exceptions
            print(f"Attempt {attempt + 1} failed with an unexpected error: {e}")
            time.sleep(1)  # Wait before retrying

    return None, None  # Return None if all attempts fail

# Function to extract all sublinks (anchor tags) from the soup object
def extract_sublinks(soup, base_url):
    """Extract and return all sublinks from the HTML soup."""
    sublinks = []
    for link in soup.find_all('a', href=True):
        # Convert relative URLs to absolute URLs
        full_url = urljoin(base_url, link['href'])
        sublinks.append(full_url)
    return sublinks

# Function to crawl through the provided URLs and fetch data
def crawl_urls(url_list):
    """Crawl the list of URLs and return crawled data and sublinks."""
    results = {}
    sublinks_data = []  # List to store (parent_url, sublink) pairs
    for index, url in enumerate(url_list):
        print(f"Fetching: {url}")
        text, soup = fetch_page_text(url)
        if text and soup:
            results[url] = text  # Store the crawled text
            sublinks = extract_sublinks(soup, url)  # Extract sublinks
            sublinks_data.extend((url, sublink) for sublink in sublinks)  # Collect sublink pairs
        else:
            print(f"Failed to parse URL at index: {index}, URL: {url}")
    return results, sublinks_data

# Function to save the crawled text data to .txt files
def save_crawled_data(crawled_data, urls):
    """Save crawled text data to individual .txt files."""
    for index, url in enumerate(urls):
        text = crawled_data.get(url, "")
        cleaned_text = text.replace('\n', ' ')  # Remove newline characters

        # Define the output file path
        output_file = f"./data/scraped/scraped_web_text_data/{index}.txt"

        # Save the cleaned text to the file
        with open(output_file, 'w', encoding='utf-8') as file:
            file.write(cleaned_text)

# Function to save sublinks to a CSV file
def save_sublinks_to_csv(sublinks_data, output_csv):
    """Save sublink data to a CSV file."""
    df = pd.DataFrame(sublinks_data, columns=['Parent URL', 'Sublink'])
    df.to_csv(output_csv, index=False)

# Function to normalize a URL by removing the fragment
def normalize_url(url):
    """Remove the fragment from a URL."""
    parsed_url = urlparse(url)
    return urlunparse(parsed_url._replace(fragment=''))

# Function to check and remove duplicate URLs based on the base URL
def remove_duplicate_urls(file_path, column_name):
    """Remove duplicate URLs from the specified column in the CSV file."""
    data = pd.read_csv(file_path)
    data['normalized_url'] = data[column_name].apply(normalize_url)
    data = data.dropna(subset=['normalized_url'])  # Remove rows with NaN URLs
    data_cleaned = data.drop_duplicates(subset='normalized_url', keep='first')  # Remove duplicates
    data_cleaned = data_cleaned.reset_index(drop=False)  # Keep original index
    data_cleaned['new_index'] = range(len(data_cleaned))  # New index column
    data_cleaned = data_cleaned.drop(columns=['normalized_url'])  # Drop temporary column

    # Save the cleaned data back to a CSV file
    output_file = file_path.replace('.csv', '_filtered.csv')
    data_cleaned.to_csv(output_file, index=False)
    print(f"/nDuplicates removed. Cleaned data saved to {output_file}")

if __name__ == "__main__":
    # Read the CSV file with URLs
    file_path = './data/raw/csv_data/data_source.csv'
    data = pd.read_csv(file_path)

    # Extract non-empty URLs from the 'Source URL' column
    urls = data[data['Select'] == 'Webpage']['Source URL'].dropna().unique()

    # Start crawling the URLs
    crawled_data, sublinks_data = crawl_urls(urls)

    # Save the crawled text data
    save_crawled_data(crawled_data, urls)

    # Save sublinks to CSV
    output_csv = './data/scraped/parentlink_file_name_url_mapping.csv'
    save_sublinks_to_csv(sublinks_data, output_csv)

    # Remove duplicates from the sublink CSV
    remove_duplicate_urls(output_csv, 'Sublink')


Fetching: https://en.wikipedia.org/wiki/Pittsburgh
Fetching: https://en.wikipedia.org/wiki/History_of_Pittsburgh
Fetching: https://pittsburghpa.gov/pittsburgh/pgh-about
Attempt 1 failed: 404 Client Error: Not Found for url: https://www.pittsburghpa.gov/pittsburgh/pgh-about
Failed to parse URL at index: 2, URL: https://pittsburghpa.gov/pittsburgh/pgh-about
Fetching: https://www.britannica.com/place/Pittsburgh
Fetching: https://pittsburghpa.gov/events/index.html
Attempt 1 failed: 404 Client Error: Not Found for url: https://www.pittsburghpa.gov/events/index.html
Failed to parse URL at index: 4, URL: https://pittsburghpa.gov/events/index.html
Fetching: https://www.visitpittsburgh.com/blog/move-in-day-pittsburgh-college-guide/
Fetching: https://pittsburghpa.gov/mayor/pghmayors
Attempt 1 failed: 404 Client Error: Not Found for url: https://www.pittsburghpa.gov/mayor/pghmayors
Failed to parse URL at index: 6, URL: https://pittsburghpa.gov/mayor/pghmayors
Fetching: https://en.wikipedia.org/wi

Parsing static webpages from the collected sublink file. It is a part of the web crawling process.
Based on the URLs of the collected sublink URLs, this script crawls the webpages and extracts ALL the text data (if opening the sublink URL does not timeout).

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
import warnings
from urllib.parse import urljoin
from urllib3.exceptions import InsecureRequestWarning
from tqdm import tqdm
import csv

# Define user agents
user_agents = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
]
warnings.filterwarnings('ignore', category=InsecureRequestWarning)

# Define headers for requests
headers = {'User-Agent': random.choice(user_agents)}
session = requests.Session()

# Fetch the page text with retry mechanism
def fetch_page_text(url, retries=1, timeout=5):
    attempt = 0
    while attempt < retries:
        try:
            # Send a GET request to the URL with a timeout
            response = session.get(url, timeout=timeout, headers=headers, verify=False)
            response.raise_for_status()  # Raise an error for bad responses

            # Parse the page content
            soup = BeautifulSoup(response.content, 'lxml')

            # Extract and return the text from the page and the soup object
            return soup.get_text(separator='\n', strip=True), soup

        except requests.exceptions.RequestException as e:
            attempt += 1

        if attempt == retries:
            return None, None

# Save the crawled text data to .txt files
def save_crawled_text(url, text, index):
    # Clean up the text by removing newlines
    cleaned_text = text.replace('\n', ' ')

    # Define the file name with the index from the URLs list
    output_file = f"./data/scraped/scraped_sublink_text_data/{index}.txt"

    # Save the cleaned text to the file
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(cleaned_text)

# Crawl through the URLs, parse and save one by one
def crawl_and_save(url_list, indexes):
    for index, url in tqdm(enumerate(url_list), total=len(url_list)):
        if url.startswith('http'):
            text, _ = fetch_page_text(url)
            if text:
                # Save the parsed text immediately after parsing
                save_crawled_text(url, text, indexes[index])
            else:
                print(f"Failed to parse URL at index {index}, URL: {url}")

if __name__ == "__main__":
    # Read the CSV file with URLs
    file_path = './data/scraped/sublink_file_name_url_mapping_filtered.csv'
    data = pd.read_csv(file_path)

    # Extract non-empty URLs from the 'Source URL' column
    urls = data['Value']
    indexes = data['new_index']

    # Start crawling the URLs and save the result one by one
    crawl_and_save(urls, indexes)

    print("Crawling complete!")

  7%|▋         | 968/14621 [33:20<6:12:19,  1.64s/it]

Failed to parse URL at index 967, URL: https://rmucolonials.com/sports/2013/9/9/athletics_0909134014.aspx?id=694


  9%|▊         | 1245/14621 [42:44<5:40:00,  1.53s/it]

Failed to parse URL at index 1244, URL: https://en.wikipedia.org/w/index.php?title=Pennsylvania_American_Water&action=edit&redlink=1


 10%|▉         | 1460/14621 [50:08<9:53:06,  2.70s/it]

Failed to parse URL at index 1460, URL: https://triblive.com/local/regional/lawrence-county-added-to-pittsburgh-metro-area/


 10%|█         | 1464/14621 [50:13<6:38:04,  1.82s/it]

Failed to parse URL at index 1464, URL: https://www.usatoday.com/money/economy/2009-09-21-us-steel-pittsburgh_N.htm


 10%|█         | 1466/14621 [50:14<4:09:46,  1.14s/it]

Failed to parse URL at index 1465, URL: http://www.wtae.com/Just-How-Many-Bridges-Are-There-In-Pittsburgh/-/9681798/7685514/-/jaknsc/-/index.html


 10%|█         | 1467/14621 [50:15<3:51:01,  1.05s/it]

Failed to parse URL at index 1466, URL: http://articles.chicagotribune.com/1987-10-18/travel/8703180822_1_steel-truss-bridge-twin-bridges-arches


 10%|█         | 1470/14621 [50:22<6:32:13,  1.79s/it]

Failed to parse URL at index 1469, URL: http://www.bivouacbooks.com/bbv4i4s4.htm


 10%|█         | 1471/14621 [50:27<10:17:15,  2.82s/it]

Failed to parse URL at index 1470, URL: http://www.post-gazette.com/stories/local/community-eyewitness/eyewitness-1949-tv-makes-pittsburgh-a-new-promise-247120/


 10%|█         | 1475/14621 [50:36<9:19:23,  2.55s/it]

Failed to parse URL at index 1474, URL: http://www.epp.cmu.edu/graduate/faq_contacts_pittsburgh.html


 10%|█         | 1477/14621 [50:39<7:08:38,  1.96s/it]

Failed to parse URL at index 1477, URL: http://triblive.com/x/pittsburghtrib/lifestyles/s_621289.html#axzz2Qv5ER9ag


 10%|█         | 1483/14621 [50:48<5:52:29,  1.61s/it]

Failed to parse URL at index 1482, URL: https://www.uschamber.com/blog/innovate-or-die-pittsburgh-chose-innovate


 10%|█         | 1486/14621 [50:56<9:21:22,  2.56s/it]

Failed to parse URL at index 1485, URL: http://www.post-gazette.com/stories/business/news/in-desperate-1983-there-was-nowhere-for-pittsburghs-economy-to-go-but-up-667537/


 10%|█         | 1489/14621 [51:04<10:46:37,  2.95s/it]

Failed to parse URL at index 1488, URL: https://www.post-gazette.com/local/region/2013/10/13/30-Years-Pittsburgh-moves-from-heavy-industry-to-medicine-tech-energy/stories/201310130085


 10%|█         | 1490/14621 [51:04<7:53:24,  2.16s/it] 

Failed to parse URL at index 1489, URL: https://www.jstor.org/stable/j.ctt9qh7tx


 10%|█         | 1492/14621 [51:06<5:25:30,  1.49s/it]

Failed to parse URL at index 1491, URL: https://doi.org/10.2307%2Fj.ctt9qh7tx


 10%|█         | 1495/14621 [51:18<12:42:18,  3.48s/it]

Failed to parse URL at index 1494, URL: http://www.nsf.gov/statistics/infbrief/nsf13305/nsf13305.pdf


 10%|█         | 1496/14621 [51:27<18:46:20,  5.15s/it]

Failed to parse URL at index 1496, URL: http://triblive.com/news/adminpage/4255869-74/pittsburgh-survey-cities#axzz2XJlCYN00


 10%|█         | 1499/14621 [51:35<14:09:09,  3.88s/it]

Failed to parse URL at index 1498, URL: http://www.post-gazette.com/business/tech-news/2014/12/07/Google-effect-How-has-tech-giant-changed-Pittsburgh-s-commerce-and-culture/stories/201412040291


 10%|█         | 1501/14621 [51:40<12:06:19,  3.32s/it]

Failed to parse URL at index 1500, URL: http://174.143.38.57/wp-content/uploads/2010/06/S013_ROBOT-RxSellSheet.pdf


 10%|█         | 1506/14621 [51:49<6:44:08,  1.85s/it]

Failed to parse URL at index 1505, URL: http://www.ncfta.net/contact-ncfta.aspx


 10%|█         | 1508/14621 [51:55<9:04:37,  2.49s/it]

Failed to parse URL at index 1507, URL: http://www.rec.ri.cmu.edu/about/history/


 10%|█         | 1509/14621 [52:01<12:13:00,  3.35s/it]

Failed to parse URL at index 1508, URL: https://www.post-gazette.com/business/career-workplace/2014/12/10/Pittsburgh-s-employment-numbers-better-than-similar-cities/stories/201412100052


 10%|█         | 1510/14621 [52:01<9:05:57,  2.50s/it] 

Failed to parse URL at index 1509, URL: http://www.pittsburghlive.com/x/pittsburghtrib/business/s_580675.html


 10%|█         | 1511/14621 [52:06<11:48:36,  3.24s/it]

Failed to parse URL at index 1510, URL: http://www.post-gazette.com/stories/local/region/pittsburgh-region-sees-11th-consecutive-month-of-home-sales-increases-655305/


 10%|█         | 1512/14621 [52:07<9:34:45,  2.63s/it] 



 10%|█         | 1515/14621 [52:13<7:31:35,  2.07s/it]

Failed to parse URL at index 1515, URL: https://www.nytimes.com/2009/01/08/business/economy/08collapse.html
Failed to parse URL at index 1516, URL: http://triblive.com/business/headlines/3623127-74/housing-units-construction#axzz2N1FFN9Hi


 10%|█         | 1519/14621 [52:15<4:04:38,  1.12s/it]

Failed to parse URL at index 1518, URL: http://www.zillowstatic.com/vstatic/419b583f682a74b83f007039dd9c49f8/static/pages/visuals/neg-equity-map/v3/map.html?embed=1&loc=4/38.41056/-97.95410


 10%|█         | 1521/14621 [52:23<8:12:38,  2.26s/it]

Failed to parse URL at index 1520, URL: http://www.post-gazette.com/in-the-lead-2014-stories/2014/05/14/Community-Pittsburgh-most-bars-per-capita-second-most-pizza/stories/201405150065


 10%|█         | 1522/14621 [52:27<10:06:06,  2.78s/it]

Failed to parse URL at index 1522, URL: http://www.carnegielibrary.org/exhibit/hname.html


 10%|█         | 1527/14621 [52:42<13:00:49,  3.58s/it]

Failed to parse URL at index 1526, URL: http://old.post-gazette.com/pg/03001/700027-209.stm


 10%|█         | 1528/14621 [52:44<11:34:27,  3.18s/it]

Failed to parse URL at index 1528, URL: http://www.carnegielibrary.org/exhibit/hname2.html


 10%|█         | 1534/14621 [53:01<12:26:24,  3.42s/it]

Failed to parse URL at index 1533, URL: http://www.post-gazette.com/opinion/Op-Ed/2011/07/17/The-Next-Page-Are-yinz-from-Pittsburg/stories/201107170206


 11%|█         | 1538/14621 [53:09<8:18:01,  2.28s/it]

Failed to parse URL at index 1537, URL: https://www.friendsoftheriverfront.org/new_pages/historical.htm


 11%|█         | 1547/14621 [53:30<10:11:26,  2.81s/it]

Failed to parse URL at index 1546, URL: http://www.co.greene.pa.us/secured/gc2/history/Struggle-for-Possession.pdf



Assuming this really is an XML document, what you're doing might work, but you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the Python package 'lxml' installed, and pass the keyword argument `features="xml"` into the BeautifulSoup constructor.




  soup = BeautifulSoup(response.content, 'lxml')
 11%|█         | 1549/14621 [53:35<9:31:54,  2.63s/it] 

Failed to parse URL at index 1549, URL: https://triblive.com/local/pittsburgh-allegheny/pittsburgh-recognized-as-starting-point-for-lewis-and-clark-expedition/


 11%|█         | 1551/14621 [53:40<9:42:31,  2.67s/it]

Failed to parse URL at index 1550, URL: https://www.post-gazette.com/opinion/brian-oneill/2018/05/13/brian-o-neill-lewis-and-clark-trail-pittsburgh-mississippi-river/stories/201805130067


 11%|█         | 1555/14621 [53:50<10:49:14,  2.98s/it]

Failed to parse URL at index 1554, URL: https://web.archive.org/web/20120812191959/http://www.census.gov/population/www/documentation/twps0076/twps0076.html


 11%|█         | 1556/14621 [53:51<8:33:54,  2.36s/it] 

Failed to parse URL at index 1555, URL: https://www.census.gov/population/www/documentation/twps0076/twps0076.html


 11%|█         | 1558/14621 [54:00<12:34:03,  3.46s/it]

Failed to parse URL at index 1557, URL: http://old.post-gazette.com/lifestyle/20030218kids0218p9.asp


 11%|█         | 1561/14621 [54:10<13:24:53,  3.70s/it]

Failed to parse URL at index 1560, URL: http://www.post-gazette.com/columnists/20031119sally104col2p2.asp


 11%|█         | 1565/14621 [54:21<9:53:22,  2.73s/it] 

Failed to parse URL at index 1564, URL: http://www.hort.purdue.edu/newcrop/cropmap/pennsylvania/maps/PAeco3.html


 11%|█         | 1566/14621 [54:26<12:22:38,  3.41s/it]

Failed to parse URL at index 1565, URL: https://www.post-gazette.com/ae/books/2004/03/16/Learning-the-steps-Pitt-researcher-fell-for-city-s-stairs-and-has-published-a-book-that-maps-them/stories/200403160099


 11%|█         | 1570/14621 [54:35<8:42:31,  2.40s/it] 

Failed to parse URL at index 1569, URL: https://en.wikipedia.org/wiki/Category:CS1_maint:_unfit_URL


 11%|█         | 1572/14621 [54:38<6:38:18,  1.83s/it]

Failed to parse URL at index 1571, URL: http://www.city.pittsburgh.pa.us/portal/neighborhoods.html


 11%|█         | 1573/14621 [54:43<10:05:43,  2.79s/it]

Failed to parse URL at index 1572, URL: https://web.archive.org/web/20120401112817/http://www.emporis.com/building/us-steel-tower-pittsburgh-pa-usa


 11%|█         | 1575/14621 [54:47<8:52:10,  2.45s/it]

Failed to parse URL at index 1574, URL: http://www.portauthority.org/paac/SchedulesMaps/Maps.aspx


 11%|█         | 1576/14621 [54:52<11:53:25,  3.28s/it]

Failed to parse URL at index 1575, URL: http://www.post-gazette.com/opinion/brian-oneill/2014/01/09/Rising-home-prices-tell-Pittsburgh-s-uplifting-story/stories/201401090182
Failed to parse URL at index 1576, URL: https://investors.ae.com/news-releases/news-releases-details/2005/American-Eagle-Outfitters-Announces-Pittsburghs-SouthSide-Works-Location-As-New-Corporate-Headquarters/default.aspx


 11%|█         | 1578/14621 [54:57<10:47:00,  2.98s/it]

Failed to parse URL at index 1577, URL: https://archive.today/20120717005140/http://findarticles.com/p/articles/mi_m0EIN/is_2005_Oct_21/ai_n15726879


 11%|█         | 1590/14621 [56:06<9:35:46,  2.65s/it] 

Failed to parse URL at index 1589, URL: http://www.pittsburghlive.com/x/pittsburghtrib/s_429269.html


 11%|█         | 1593/14621 [56:14<11:08:19,  3.08s/it]

Failed to parse URL at index 1592, URL: https://www.washingtonpost.com/business/economy/in-the-paris-of-the-appalachians-theyre-not-buying-trumps-climate-talk/2017/06/06/6f3ddd8a-49f9-11e7-bc1b-fddbd8359dee_story.html


 11%|█         | 1599/14621 [56:28<7:58:26,  2.20s/it]

Failed to parse URL at index 1599, URL: https://search.worldcat.org/issn/1027-5606
Failed to parse URL at index 1600, URL: https://www.westword.com/news/photos-ten-most-chill-major-cities-in-the-summertime-and-where-denver-places-5831262


 11%|█         | 1604/14621 [56:38<7:35:13,  2.10s/it]

Failed to parse URL at index 1605, URL: https://w2.weather.gov/climate/xmacis.php?wfo=pbz


 11%|█         | 1608/14621 [56:44<7:21:31,  2.04s/it]

Failed to parse URL at index 1607, URL: http://www.erh.noaa.gov/pbz/thissnow.htm


 11%|█         | 1611/14621 [56:54<10:52:51,  3.01s/it]

Failed to parse URL at index 1610, URL: http://www.ncdc.noaa.gov/oa/climate/online/ccd/cldy.html


 11%|█         | 1613/14621 [57:02<13:15:28,  3.67s/it]

Failed to parse URL at index 1612, URL: http://www.ncdc.noaa.gov/oa/climate/online/ccd/pctpos.txt


 11%|█         | 1617/14621 [57:12<10:24:12,  2.88s/it]

Failed to parse URL at index 1617, URL: https://triblive.com/opinion/rich-fitzgerald-reflecting-on-12-years-of-service/


 11%|█         | 1620/14621 [57:17<7:30:27,  2.08s/it]

Failed to parse URL at index 1619, URL: http://www.stateoftheair.org/2013/city-rankings/most-polluted-cities.html


 11%|█         | 1621/14621 [57:22<10:15:30,  2.84s/it]

Failed to parse URL at index 1620, URL: https://web.archive.org/web/20150107185644/http://www.stateoftheair.org/2013/city-rankings/most-polluted-cities.html


 11%|█         | 1623/14621 [57:29<12:25:26,  3.44s/it]

Failed to parse URL at index 1622, URL: http://www.post-gazette.com/stories/local/neighborhoods-city/report-pittsburghs-air-quality-improving-but-still-among-most-polluted-684783/


 11%|█         | 1625/14621 [57:32<8:21:50,  2.32s/it] 

Failed to parse URL at index 1624, URL: http://www.pittsburghlive.com/x/pittsburghtrib/news/cityregion/s_565183.html


 11%|█         | 1626/14621 [57:37<11:12:27,  3.10s/it]

Failed to parse URL at index 1625, URL: http://www.post-gazette.com/news/nation/2013/12/10/8-Northeast-states-sue-over-pollution/stories/201312100110


 11%|█         | 1628/14621 [57:42<9:37:17,  2.67s/it] 

Failed to parse URL at index 1627, URL: http://www.stateoftheair.org/2012/msas/Pittsburgh-New-Castle-PA.html


 11%|█         | 1629/14621 [57:47<12:30:45,  3.47s/it]

Failed to parse URL at index 1628, URL: https://archive.today/20120918013148/http://www.wpxi.com/news/23287732/detail.html


 11%|█         | 1630/14621 [57:48<9:16:16,  2.57s/it] 

Failed to parse URL at index 1630, URL: https://nextcity.org/daily/entry/that-tree-on-the-corner-may-be-worth-more-than-your-house


 11%|█         | 1640/14621 [58:09<11:05:45,  3.08s/it]

Failed to parse URL at index 1639, URL: https://www.post-gazette.com/local/city/2018/01/22/Pittsburgh-Mayor-Bill-Peduto-executive-order-pushes-forward-PWSA-restructuring-water-board/stories/201801220112


 11%|█▏        | 1646/14621 [58:24<11:04:44,  3.07s/it]

Failed to parse URL at index 1645, URL: http://www.post-gazette.com/local/city/2017/06/07/alcosan-pittsburgh-epa-sewage-control-plan-stormwater-pa/stories/201706060181


 11%|█▏        | 1647/14621 [58:26<10:27:21,  2.90s/it]

Failed to parse URL at index 1647, URL: http://triblive.com/local/allegheny/12976229-74/gov-wolf-to-ok-bill-placing-pittsburghs-water-system-under-puc-oversight


 11%|█▏        | 1649/14621 [58:32<9:49:26,  2.73s/it] 

Failed to parse URL at index 1648, URL: http://www.post-gazette.com/local/city/2017/02/03/Peduto-administration-plans-advisory-team-to-assess-PWSA-Pittsburgh/stories/201702030228


 11%|█▏        | 1652/14621 [58:37<8:20:34,  2.32s/it]

Failed to parse URL at index 1652, URL: https://search.worldcat.org/issn/1059-1028


 11%|█▏        | 1654/14621 [58:38<5:30:03,  1.53s/it]

Failed to parse URL at index 1654, URL: https://search.worldcat.org/issn/0261-3077


 11%|█▏        | 1659/14621 [58:43<4:02:18,  1.12s/it]

Failed to parse URL at index 1658, URL: http://www.census.gov/prod/www/decennial.html


 11%|█▏        | 1660/14621 [58:48<7:44:31,  2.15s/it]

Failed to parse URL at index 1659, URL: https://www.census.gov/quickfacts/fact/table/pittsburghcitypennsylvania/POP010220


 11%|█▏        | 1661/14621 [58:53<10:53:14,  3.02s/it]

Failed to parse URL at index 1660, URL: https://www.census.gov/quickfacts/pittsburghcitypennsylvania


 11%|█▏        | 1662/14621 [58:56<10:35:43,  2.94s/it]

Failed to parse URL at index 1662, URL: http://quickfacts.census.gov/qfd/states/42/4261000.html


 11%|█▏        | 1672/14621 [1:07:12<38:40:45, 10.75s/it]

Failed to parse URL at index 1671, URL: https://web.archive.org/web/20080906191606/http://www.ukrweekly.com/old/archive/2000/020012.shtml


 11%|█▏        | 1673/14621 [1:07:12<27:49:40,  7.74s/it]

Failed to parse URL at index 1673, URL: http://www.ukrweekly.com/old/archive/2000/020012.shtml


 11%|█▏        | 1678/14621 [1:07:19<10:51:03,  3.02s/it]

Failed to parse URL at index 1677, URL: http://www.thearda.com/rcms2010/r/m/38300/rcms2010_38300_metro_name_2010.asp


 12%|█▏        | 1688/14621 [1:07:49<5:54:04,  1.64s/it]

Failed to parse URL at index 1687, URL: https://www.census.gov/acs/www/Products/Ranking/2002/R02T160.htm


 12%|█▏        | 1690/14621 [1:07:51<4:47:19,  1.33s/it]

Failed to parse URL at index 1689, URL: https://www.census.gov/acs/www/Products/Ranking/2002/R13T160.htm


 12%|█▏        | 1691/14621 [1:07:52<4:19:47,  1.21s/it]

Failed to parse URL at index 1690, URL: http://www.s4.brown.edu/us2010/Data/Report/report2.pdf


 12%|█▏        | 1692/14621 [1:07:53<3:46:01,  1.05s/it]

Failed to parse URL at index 1692, URL: https://www.americanimmigrationcouncil.org/sites/default/files/research/council_new_americans_in_pittsburgh_9_2023.pdf


 12%|█▏        | 1694/14621 [1:07:53<2:25:49,  1.48it/s]

Failed to parse URL at index 1694, URL: https://www.jstor.org/stable/j.ctv1rdtwq2


 12%|█▏        | 1699/14621 [1:07:59<3:44:43,  1.04s/it]

Failed to parse URL at index 1698, URL: http://www.pghtech.org/aboutus/about-our-region.aspx


 12%|█▏        | 1700/14621 [1:08:04<7:32:59,  2.10s/it]

Failed to parse URL at index 1699, URL: https://web.archive.org/web/20140327204307/http://www.pghtech.org/aboutus/about-our-region.aspx


 12%|█▏        | 1706/14621 [1:08:34<20:25:22,  5.69s/it]

Failed to parse URL at index 1706, URL: http://blog.triblive.com/thisjustin/2014/01/24/pittsburgh-2-top-10-cities-to-achieve-the-american-dream/#axzz2rbykQv6y


 12%|█▏        | 1710/14621 [1:08:39<9:32:42,  2.66s/it] 

Failed to parse URL at index 1709, URL: https://web.archive.org/web/20110604181118/http://www.pittsburghlive.com/x/pittsburghtrib/business/s_739819.html


 12%|█▏        | 1711/14621 [1:08:40<7:23:32,  2.06s/it]

Failed to parse URL at index 1710, URL: http://www.pittsburghlive.com/x/pittsburghtrib/business/s_739819.html


 12%|█▏        | 1716/14621 [1:08:45<5:17:45,  1.48s/it]

Failed to parse URL at index 1715, URL: http://www.alleghenyconference.org/public/cfm/d_and_d/index.cfm?


 12%|█▏        | 1721/14621 [1:08:50<3:37:42,  1.01s/it]

Failed to parse URL at index 1720, URL: http://www.pittsburghartscouncil.org/resources/research/economic-impact-studies/282-economic-impact-studies


 12%|█▏        | 1722/14621 [1:08:55<7:38:14,  2.13s/it]

Failed to parse URL at index 1721, URL: http://www.greenfirst.us/greenfirst


 12%|█▏        | 1723/14621 [1:08:57<7:43:18,  2.16s/it]

Failed to parse URL at index 1722, URL: http://imaginepittsburgh.com/now/greenbldgs-oyw122012/18348/#sthash.JMuO8IMj.dpuf
Failed to parse URL at index 1723, URL: http://triblive.com/aande/architecture/2806500-74/office-pittsburgh-energy-gensler-towers-aia-designed-firm-pnc-cities#axzz2aE14G7u6


 12%|█▏        | 1726/14621 [1:09:02<6:31:00,  1.82s/it]

Failed to parse URL at index 1724, URL: http://www.post-gazette.com/pg/09340/1018586-28.stm
Failed to parse URL at index 1725, URL: http://www.google.com/hostednews/ap/article/ALeqM5hstlzVCvbYCMG7AirNjE4AaLp2jgD9HKNI5G1%7Carchiveurl=https://web.archive.org/web/20100820121952/http://www.google.com/hostednews/ap/article/ALeqM5hstlzVCvbYCMG7AirNjE4AaLp2jgD9HKNI5G1


 12%|█▏        | 1730/14621 [1:09:13<9:59:46,  2.79s/it]

Failed to parse URL at index 1729, URL: http://www.post-gazette.com/stories/ae/movies/you-saw-it-here-first-pittsburghs-nickelodeon-introduced-the-moving-picture-theater-to-the-masses-in-1905-587730/


 12%|█▏        | 1733/14621 [1:09:21<10:57:20,  3.06s/it]

Failed to parse URL at index 1732, URL: http://old.post-gazette.com/pg/12064/1214341-60.stm#ixzz2lkZSc4FQ


 12%|█▏        | 1736/14621 [1:09:28<10:46:11,  3.01s/it]

Failed to parse URL at index 1735, URL: http://www.post-gazette.com/stories/local/neighborhoods-city/4-star-film-studio-coming-to-strip-district-215569/


 12%|█▏        | 1738/14621 [1:09:30<6:43:04,  1.88s/it]

Failed to parse URL at index 1738, URL: http://www.31ststreetstudios.com/


 12%|█▏        | 1740/14621 [1:09:35<8:06:46,  2.27s/it]

Failed to parse URL at index 1739, URL: http://www.post-gazette.com/stories/opinion/editorials/lights-cameras-action-at-a-new-studio-keeps-pittsburgh-on-film-220817/


 12%|█▏        | 1741/14621 [1:09:36<6:42:41,  1.88s/it]

Failed to parse URL at index 1740, URL: http://www.cbsnews.com/8301-505245_162-57386770/pa-film-studio-to-feature-avatar-technology/


 12%|█▏        | 1744/14621 [1:09:44<8:05:52,  2.26s/it]

Failed to parse URL at index 1744, URL: http://www.pghfilm.org/about/about_filmography.jsp


 12%|█▏        | 1748/14621 [1:09:51<8:10:58,  2.29s/it]

Failed to parse URL at index 1747, URL: http://www.post-gazette.com/local/city/2013/06/19/Invention-convention-INPEX-gathers-in-Pittsburgh/stories/2013061902140000000


 12%|█▏        | 1756/14621 [1:10:10<10:44:57,  3.01s/it]

Failed to parse URL at index 1755, URL: http://www.post-gazette.com/business/businessnews/2013/07/04/Furries-leave-visible-prints-Downtown-and-in-Pittsburgh-s-coffers/stories/201307040173


 12%|█▏        | 1757/14621 [1:10:11<8:32:38,  2.39s/it] 

Failed to parse URL at index 1757, URL: https://www.discogs.com/release/3047883-Mary-Lou-Williams-And-Group-Pittsburgh


 12%|█▏        | 1769/14621 [1:10:32<9:49:27,  2.75s/it]

Failed to parse URL at index 1768, URL: https://old.post-gazette.com/regionstate/20000926ecstasy3.asp


 12%|█▏        | 1771/14621 [1:10:39<11:42:02,  3.28s/it]

Failed to parse URL at index 1770, URL: https://old.post-gazette.com/neigh_city/20010109ravecity3.asp


 12%|█▏        | 1773/14621 [1:10:45<11:49:40,  3.31s/it]

Failed to parse URL at index 1772, URL: https://old.post-gazette.com/regionstate/20010103ravereg5.asp


 12%|█▏        | 1775/14621 [1:10:52<12:30:16,  3.50s/it]

Failed to parse URL at index 1774, URL: http://old.post-gazette.com/forum/20001003edlets6.asp


 12%|█▏        | 1785/14621 [1:11:03<3:39:48,  1.03s/it]

Failed to parse URL at index 1783, URL: http://www.pabook.libraries.psu.edu/palitmap/bios/Carson__Rachel_Louise.html


 12%|█▏        | 1786/14621 [1:11:09<8:23:25,  2.35s/it]

Failed to parse URL at index 1785, URL: https://www.post-gazette.com/ae/books/2006/12/16/Presidential-biographer-gets-presidential-medal/stories/200612160100


 12%|█▏        | 1788/14621 [1:11:16<11:18:13,  3.17s/it]

Failed to parse URL at index 1787, URL: https://old.post-gazette.com/magazine/19981011playw2.asp


 12%|█▏        | 1797/14621 [1:11:29<5:28:40,  1.54s/it]

Failed to parse URL at index 1796, URL: http://www.interzone.com/~jafriedl/SW/sowrites.htm


 12%|█▏        | 1798/14621 [1:11:34<9:11:28,  2.58s/it]

Failed to parse URL at index 1797, URL: https://web.archive.org/web/19990420000917/https://www.cs.cmu.edu/~mslee/pw.html


 12%|█▏        | 1799/14621 [1:11:36<8:07:55,  2.28s/it]

Failed to parse URL at index 1798, URL: https://www.cs.cmu.edu/~mslee/pw.html/


 12%|█▏        | 1804/14621 [1:11:45<7:25:20,  2.08s/it]

Failed to parse URL at index 1803, URL: https://www.post-gazette.com/local/neighborhood/2018/12/27/Neighborhood-Pittsburgh-top-food-city-2019/stories/201812270143


 12%|█▏        | 1806/14621 [1:11:47<6:21:43,  1.79s/it]

Failed to parse URL at index 1806, URL: https://www.nytimes.com/2006/03/17/travel/escapes/its-not-the-sights-its-the-sounds.html


 12%|█▏        | 1811/14621 [1:11:59<8:30:22,  2.39s/it]

Failed to parse URL at index 1810, URL: https://web.archive.org/web/20100520062827/http://www.pittsburghlive.com/x/pittsburghtrib/news/regional/s_681477.html


 12%|█▏        | 1813/14621 [1:12:06<10:41:37,  3.01s/it]

Failed to parse URL at index 1812, URL: https://www.post-gazette.com/local/north/2005/09/15/Damage-repaired-trauma-remains-after-2004-floods/stories/200509150366


 12%|█▏        | 1815/14621 [1:12:12<11:11:46,  3.15s/it]

Failed to parse URL at index 1814, URL: https://www.post-gazette.com/local/city/2011/03/17/The-historic-St-Patrick-s-Day-Flood-of-1936-two-eyewitness-accounts/stories/201103170442


 12%|█▏        | 1816/14621 [1:12:14<9:49:03,  2.76s/it] 

Failed to parse URL at index 1816, URL: http://aol.sportingnews.com/mlb/story/2009-10-06/black-gold-mettle-pittsburgh-best-sports-city-0


 12%|█▏        | 1818/14621 [1:12:19<9:46:03,  2.75s/it]

Failed to parse URL at index 1817, URL: http://www.post-gazette.com/stories/life/zillow/pittsburgh-among-top-baseball-cities-676043/


 12%|█▏        | 1827/14621 [1:12:34<4:33:44,  1.28s/it]

Failed to parse URL at index 1826, URL: http://www.timesonline.com/site/news.cfm?newsid=17047895&BRD=2305&PAG=461&dept_id=478568&rfi=6


 13%|█▎        | 1828/14621 [1:12:35<4:40:50,  1.32s/it]

Failed to parse URL at index 1828, URL: http://archive.alexreisner.com/baseball/history/race?y=1997


 13%|█▎        | 1832/14621 [1:12:39<3:47:30,  1.07s/it]

Failed to parse URL at index 1831, URL: http://www.pittsburghlive.com/x/pittsburghtrib/sports/steelers/s_585606.html?source=rss&feed=3


 13%|█▎        | 1833/14621 [1:12:45<8:15:36,  2.33s/it]

Failed to parse URL at index 1832, URL: https://documenting.pitt.edu/islandora/object/pitt%3A00afj8718m/viewer#page/248/mode/2up


 13%|█▎        | 1835/14621 [1:12:47<6:54:18,  1.94s/it]

Failed to parse URL at index 1834, URL: https://web.archive.org/web/20100823084558/http://www.pittsburghlive.com/x/pittsburghtrib/sports/s_695755.html


 13%|█▎        | 1836/14621 [1:12:48<5:08:11,  1.45s/it]

Failed to parse URL at index 1835, URL: http://www.pittsburghlive.com/x/pittsburghtrib/sports/s_695755.html


 13%|█▎        | 1837/14621 [1:12:53<9:16:38,  2.61s/it]

Failed to parse URL at index 1836, URL: https://www.post-gazette.com/local/city/2010/05/25/Mellon-Arena-roof-may-open-for-final-show/stories/201005250282


 13%|█▎        | 1838/14621 [1:12:58<11:47:29,  3.32s/it]

Failed to parse URL at index 1837, URL: http://www.post-gazette.com/life/recreation/2015/11/30/More-ice-time-Pittsburgh-loves-ice-skating-but-how-many-rinks-might-prove-too-many/stories/201511290019


 13%|█▎        | 1840/14621 [1:13:00<7:07:26,  2.01s/it]

Failed to parse URL at index 1839, URL: http://www.nba.com/history/records/regular_fieldgoals.html


 13%|█▎        | 1844/14621 [1:13:09<6:42:50,  1.89s/it]

Failed to parse URL at index 1843, URL: http://www.nba.com/history/season/19501951.html


 13%|█▎        | 1845/14621 [1:13:14<10:02:15,  2.83s/it]

Failed to parse URL at index 1844, URL: https://web.archive.org/web/20150320095356/http://www.nba.com/history/season/19501951.html


 13%|█▎        | 1849/14621 [1:13:20<5:46:32,  1.63s/it]

Failed to parse URL at index 1849, URL: https://search.worldcat.org/oclc/1102593197


 13%|█▎        | 1851/14621 [1:13:21<4:02:24,  1.14s/it]

Failed to parse URL at index 1850, URL: https://wpga.org/nws/2021/210211-foxburg-country-club-the-oldest-course-in-continuous-use-in-us.html


 13%|█▎        | 1852/14621 [1:13:21<3:14:21,  1.09it/s]

Failed to parse URL at index 1851, URL: https://www.golfdigest.com/story/golf_palmer_80_shedloski_0914
Failed to parse URL at index 1852, URL: https://triblive.com/sports/mark-maddens-hot-take-stars-like-jumping-johnny-defazio-made-studio-wrestling-must-see-tv/


 13%|█▎        | 1854/14621 [1:13:23<3:16:21,  1.08it/s]

Failed to parse URL at index 1853, URL: http://www.pagop.org/about/history/


 13%|█▎        | 1856/14621 [1:13:26<3:31:24,  1.01it/s]

Failed to parse URL at index 1855, URL: http://www.gmfus.org/template/page.cfm?page_id=481


 13%|█▎        | 1857/14621 [1:13:26<3:14:16,  1.09it/s]

Failed to parse URL at index 1857, URL: https://www.fbi.gov/about-us/cjis/ucr/crime-in-the-u.s/2010/crime-in-the-u.s.-2010/caution-against-ranking


 13%|█▎        | 1860/14621 [1:13:30<3:43:15,  1.05s/it]

Failed to parse URL at index 1860, URL: https://www.fbi.gov/ucr/word.htm
Failed to parse URL at index 1861, URL: https://triblive.com/local/pittsburgh-allegheny/pittsburgh-homicides-hit-lowest-in-20-years/


 13%|█▎        | 1864/14621 [1:13:30<1:43:02,  2.06it/s]

Failed to parse URL at index 1862, URL: https://www.ceapittsburgh.org/wp-content/uploads/2020/01/A-Ground-Up-Model-for-Gun-Violence-Reduction.pdf
Failed to parse URL at index 1863, URL: https://doi.org/10.1080%2F15433714.2014.997090
Failed to parse URL at index 1864, URL: https://search.worldcat.org/issn/2376-1407


 13%|█▎        | 1869/14621 [1:13:33<2:08:40,  1.65it/s]

Failed to parse URL at index 1868, URL: https://api.semanticscholar.org/CorpusID:205889350


 13%|█▎        | 1871/14621 [1:13:36<3:12:58,  1.10it/s]

Failed to parse URL at index 1871, URL: https://triblive.com/local/pittsburgh-to-use-money-from-anti-violence-trust-fund-on-parks-recreation/


 13%|█▎        | 1874/14621 [1:13:38<2:41:52,  1.31it/s]

Failed to parse URL at index 1873, URL: http://colleges.usnews.rankingsandreviews.com/best-colleges/rankings/national-universities/page+3


 13%|█▎        | 1876/14621 [1:13:40<2:39:40,  1.33it/s]

Failed to parse URL at index 1875, URL: http://mac10.umc.pitt.edu/u/FMPro?-db=ustory.fp5&-format=d.html&-lay=a&-sortfield=issueid%3a%3aissuedate&-sortorder=descend&keywords=U.S.%20News&-max=50&-recid=39345&-find=


 13%|█▎        | 1879/14621 [1:13:43<3:11:32,  1.11it/s]

Failed to parse URL at index 1878, URL: http://www.pittmag.pitt.edu/fall2007/feature1.html


 13%|█▎        | 1880/14621 [1:13:45<3:52:43,  1.10s/it]

Failed to parse URL at index 1880, URL: http://mac10.umc.pitt.edu/u/FMPro?-db=ustory.fp5&-format=d.html&-lay=a&-sortfield=issueid%3a%3aissuedate&-sortorder=descend&keywords=School%20of%20Public%20Health%20ranked&-max=50&-recid=39152&-find=


 13%|█▎        | 1882/14621 [1:13:50<6:20:10,  1.79s/it]

Failed to parse URL at index 1881, URL: https://www.post-gazette.com/news/education/2015/08/24/Chatham-University-prepares-for-its-first-co-ed-undergrad-class/stories/201508170172


 13%|█▎        | 1886/14621 [1:13:56<5:12:10,  1.47s/it]

Failed to parse URL at index 1885, URL: https://www.law.pitt.edu/about/engaged/jurist


 13%|█▎        | 1887/14621 [1:13:57<5:24:32,  1.53s/it]

Failed to parse URL at index 1886, URL: http://www.prnewswire.com/news-releases/nielsen-reports-11-increase-in-us-television-households-for-the-2006-2007-season-56231032.html


 13%|█▎        | 1890/14621 [1:14:06<9:26:19,  2.67s/it]

Failed to parse URL at index 1889, URL: https://www.post-gazette.com/ae/tv/2004/03/28/WQED-at-50-Born-in-television-s-Golden-Age-Pittsburgh-s-public-broadcasting-station-pioneered-educational-programming/stories/200403280162


 13%|█▎        | 1893/14621 [1:14:14<10:19:07,  2.92s/it]

Failed to parse URL at index 1892, URL: https://www.post-gazette.com/business/businessnews/2008/03/08/Film-workers-here-straining-to-keep-up-with-four-movies/stories/200803080136


 13%|█▎        | 1894/14621 [1:14:19<12:31:48,  3.54s/it]

Failed to parse URL at index 1893, URL: http://www.post-gazette.com/ae/movies/2013/09/01/A-look-at-movie-locations-around-Pittsburgh/stories/201309010136


 13%|█▎        | 1895/14621 [1:14:24<14:18:26,  4.05s/it]

Failed to parse URL at index 1894, URL: http://www.post-gazette.com/ae/movies/2017/01/05/Fences-film-shoot-generated-9-4-million-for-Pittsburgh-businesses-hires-august-wilson/stories/201701050147


 13%|█▎        | 1896/14621 [1:14:25<10:31:47,  2.98s/it]

Failed to parse URL at index 1896, URL: https://pghfilm.org/


 13%|█▎        | 1898/14621 [1:14:27<7:36:12,  2.15s/it] 

Failed to parse URL at index 1898, URL: https://pghfilm.org/for-crew-and-talent/casting-notices


 13%|█▎        | 1908/14621 [1:14:39<4:19:50,  1.23s/it]

Failed to parse URL at index 1907, URL: https://cineleasestudios.com/pittsburgh-studio/


 13%|█▎        | 1913/14621 [1:14:43<3:23:05,  1.04it/s]

Failed to parse URL at index 1912, URL: https://journals.psu.edu/wph/article/view/4078/3895


 13%|█▎        | 1915/14621 [1:14:45<3:35:29,  1.02s/it]

Failed to parse URL at index 1914, URL: http://www.pgh2o.com/index.htm


 13%|█▎        | 1916/14621 [1:14:50<7:47:39,  2.21s/it]

Failed to parse URL at index 1915, URL: http://www.puc.pa.gov/consumer_info/natural_gas/natural_gas_shopping/natural_gas_suppliers_list_.aspx


 13%|█▎        | 1922/14621 [1:15:08<11:54:05,  3.37s/it]

Failed to parse URL at index 1921, URL: http://www.post-gazette.com/newslinks/timeline1961.asp


 13%|█▎        | 1923/14621 [1:15:08<9:05:01,  2.58s/it] 

Failed to parse URL at index 1923, URL: https://www.nytimes.com/1995/12/15/us/man-gets-baboon-marrow-in-risky-aids-treatment.html


 13%|█▎        | 1926/14621 [1:15:11<5:25:28,  1.54s/it]

Failed to parse URL at index 1925, URL: http://www.isbd.org/edcenter/aboutisbd.asp


 13%|█▎        | 1929/14621 [1:15:19<9:26:31,  2.68s/it]

Failed to parse URL at index 1928, URL: http://www.post-gazette.com/stories/local/neighborhoods-north/penguins-plan-sports-training-medicine-complex-in-cranberry-642087/?p=0


 13%|█▎        | 1930/14621 [1:15:25<11:47:47,  3.35s/it]

Failed to parse URL at index 1929, URL: http://www.post-gazette.com/stories/news/education/pitts-medical-school-to-help-nazarbayev-university-in-kazakhstan-develop-its-own-696530/


 13%|█▎        | 1936/14621 [1:15:39<11:54:29,  3.38s/it]

Failed to parse URL at index 1935, URL: http://www.post-gazette.com/stories/news/health/pitt-team-inserts-computer-chip-in-brain-so-a-persons-thoughts-can-instigate-motion-666707


 13%|█▎        | 1937/14621 [1:15:40<9:32:33,  2.71s/it] 

Failed to parse URL at index 1936, URL: http://kdka.com/local/bridge.Pittsburgh.2.383456.html


 13%|█▎        | 1938/14621 [1:15:45<11:58:08,  3.40s/it]

Failed to parse URL at index 1937, URL: https://web.archive.org/web/20110707075415/http://www.abridgetovenezia.com/ponts.php?langue=en


 13%|█▎        | 1943/14621 [1:19:19<62:55:36, 17.87s/it] 

Failed to parse URL at index 1942, URL: https://www.greyhound.com/en/explore-places/pittsburgh
Failed to parse URL at index 1943, URL: http://triblive.com/news/allegheny/4968949-74/pittsburgh-cities-among#axzz2j7qCvZ3v


 13%|█▎        | 1946/14621 [1:19:23<32:00:52,  9.09s/it]

Failed to parse URL at index 1945, URL: http://www.apta.com/research/stats/ridership/riderep/documents/07q1bus.pdf


 13%|█▎        | 1950/14621 [1:19:33<16:51:43,  4.79s/it]

Failed to parse URL at index 1949, URL: https://www.sistercitiespgh.org/sister-cities


 19%|█▊        | 2718/14621 [1:36:35<6:11:03,  1.87s/it]

Failed to parse URL at index 2717, URL: http://catalogo.bne.es/uhtbin/authoritybrowse.cgi?action=display&authority_id=XX5044829


 19%|█▊        | 2719/14621 [1:36:37<6:11:28,  1.87s/it]

Failed to parse URL at index 2718, URL: http://olduli.nli.org.il/F/?func=find-b&local_base=NLX10&find_code=UID&request=987007561808105171


 19%|█▊        | 2723/14621 [1:36:42<4:43:02,  1.43s/it]

Failed to parse URL at index 2722, URL: https://catalog.archives.gov/id/10046064


 21%|██        | 3049/14621 [1:42:51<4:14:17,  1.32s/it]

Failed to parse URL at index 3048, URL: https://en.wikipedia.org/w/index.php?title=Arnout_Viele&action=edit&redlink=1


 21%|██        | 3059/14621 [1:43:05<7:11:29,  2.24s/it]

Failed to parse URL at index 3058, URL: http://www.post-gazette.com/regionstate/20010513mound4.asp


 21%|██        | 3067/14621 [1:43:21<9:27:17,  2.95s/it]

Failed to parse URL at index 3066, URL: http://www.fortpittmuseum.com/History.html


 21%|██        | 3076/14621 [1:43:38<7:36:04,  2.37s/it]

Failed to parse URL at index 3075, URL: http://www.wqed.org/education/pghist/units/WPAhist/keyevents.html


 21%|██        | 3083/14621 [1:43:52<6:09:18,  1.92s/it]

Failed to parse URL at index 3082, URL: https://www.census.gov/population/www/documentation/twps0027/twps0027.html


 21%|██        | 3085/14621 [1:43:54<4:27:52,  1.39s/it]

Failed to parse URL at index 3084, URL: http://www.westinghouse.com/timeline.html


 21%|██        | 3086/14621 [1:43:55<3:46:34,  1.18s/it]

Failed to parse URL at index 3085, URL: https://journals.psu.edu/wph/article/download/2409/2242


 21%|██        | 3087/14621 [1:43:57<4:13:38,  1.32s/it]

Failed to parse URL at index 3086, URL: http://www.heinzfamily.org/aboutus/heinzhistory.html


 21%|██        | 3089/14621 [1:43:59<3:43:50,  1.16s/it]

Failed to parse URL at index 3088, URL: http://digital.library.cornell.edu/cgi/t/text/text-idx?c=nora;cc=;view=toc;subview=short;idno=nora0148-6


 21%|██        | 3092/14621 [1:44:07<8:28:51,  2.65s/it]

Failed to parse URL at index 3091, URL: http://www.post-gazette.com/life/2013/12/05/Hic-hic-hooray/stories/201312050249


 21%|██        | 3095/14621 [1:44:11<4:53:14,  1.53s/it]

Failed to parse URL at index 3094, URL: https://doi.org/10.1353%2Fpnh.0.0021


 21%|██        | 3096/14621 [1:44:11<3:38:58,  1.14s/it]

Failed to parse URL at index 3095, URL: https://api.semanticscholar.org/CorpusID:143698372


 21%|██        | 3097/14621 [1:44:16<7:22:14,  2.30s/it]

Failed to parse URL at index 3096, URL: http://www.post-gazette.com/munch/2011/06/30/Munch-goes-to-the-Blind-Pig/stories/201106300295


 21%|██        | 3098/14621 [1:44:21<9:58:37,  3.12s/it]

Failed to parse URL at index 3097, URL: http://www.post-gazette.com/stories/sectionfront/life/pittsburgh-gets-its-first-distillery-since-before-prohibition-298761/?p=0


 21%|██        | 3100/14621 [1:44:23<6:02:43,  1.89s/it]

Failed to parse URL at index 3099, URL: http://docs.lib.noaa.gov/rescue/mwr/055/mwr-055-11-0500a.pdf


 21%|██        | 3105/14621 [1:44:31<4:50:39,  1.51s/it]

Failed to parse URL at index 3104, URL: http://www.ujfpittsburgh.org/page.aspx?ID=46650


 21%|██▏       | 3107/14621 [1:44:44<11:27:42,  3.58s/it]

Failed to parse URL at index 3107, URL: https://www.nytimes.com/2000/05/13/us/john-p-robin-87-led-the-redevelopment-of-downtown-pittsburgh.html


 21%|██▏       | 3115/14621 [1:44:57<8:01:05,  2.51s/it]

Failed to parse URL at index 3115, URL: http://juh.sagepub.com/content/39/2/147.abstract?etoc


 21%|██▏       | 3120/14621 [1:45:08<8:27:34,  2.65s/it]

Failed to parse URL at index 3119, URL: http://www.post-gazette.com/businessnews/2012/12/23/In-desperate-1983-there-was-nowhere-for-Pittsburgh-s-economy-to-go-but-up/stories/201212230258


 21%|██▏       | 3123/14621 [1:45:14<8:32:24,  2.67s/it]

Failed to parse URL at index 3122, URL: http://www.post-gazette.com/businessnews/2012/12/23/For-Pittsburgh-a-future-not-reliant-on-steel-was-unthinkable-and-unavoidable/stories/201212230223


 21%|██▏       | 3125/14621 [1:45:18<7:41:27,  2.41s/it]

Failed to parse URL at index 3124, URL: http://www.wqed.org/education/pghist/units/WPAhist/wpa6.shtml


 21%|██▏       | 3128/14621 [1:45:28<9:39:23,  3.02s/it]

Failed to parse URL at index 3127, URL: http://www.phlf.org/about-landmarks/a-brief-history-of-pittsburgh-history-landmarks-foundation/


 21%|██▏       | 3130/14621 [1:45:35<10:48:04,  3.38s/it]

Failed to parse URL at index 3129, URL: http://www.post-gazette.com/planb/


 21%|██▏       | 3132/14621 [1:45:41<10:53:57,  3.42s/it]

Failed to parse URL at index 3131, URL: http://www.post-gazette.com/pg/07116/781162-53.stm


 21%|██▏       | 3136/14621 [1:45:47<7:14:51,  2.27s/it]

Failed to parse URL at index 3135, URL: https://www.census.gov/popest/data/cities/totals/2011/tables/SUB-EST2011-01.csv


 21%|██▏       | 3139/14621 [1:45:52<5:47:24,  1.82s/it]

Failed to parse URL at index 3138, URL: http://www2.census.gov/geo/ua/ua_list_ua.xls


 21%|██▏       | 3140/14621 [1:45:57<9:09:53,  2.87s/it]

Failed to parse URL at index 3139, URL: https://archive.today/20130414154507/http://www.bchistory.org/beavercounty/BeaverCountyTopical/Timelines/EarlyBCChronology.html


 21%|██▏       | 3141/14621 [1:46:03<11:25:50,  3.58s/it]

Failed to parse URL at index 3140, URL: http://www.bchistory.org/beavercounty/BeaverCountyTopical/Timelines/EarlyBCChronology.html


 21%|██▏       | 3143/14621 [1:46:06<8:31:42,  2.67s/it] 

Failed to parse URL at index 3142, URL: http://www.pagenweb.org/~somerset/bedford.htm


 22%|██▏       | 3153/14621 [1:46:22<4:14:57,  1.33s/it]

Failed to parse URL at index 3152, URL: https://www.jstor.org/stable/205101


 22%|██▏       | 3157/14621 [1:46:27<4:08:48,  1.30s/it]

Failed to parse URL at index 3157, URL: https://search.worldcat.org/oclc/21956933


 22%|██▏       | 3160/14621 [1:46:30<3:39:05,  1.15s/it]

Failed to parse URL at index 3160, URL: https://search.worldcat.org/oclc/30696960


 22%|██▏       | 3163/14621 [1:46:33<3:24:43,  1.07s/it]

Failed to parse URL at index 3163, URL: https://search.worldcat.org/oclc/1018684
Failed to parse URL at index 3164, URL: https://search.worldcat.org/oclc/22234968


 22%|██▏       | 3170/14621 [1:46:44<5:54:51,  1.86s/it]

Failed to parse URL at index 3169, URL: http://digital.library.pitt.edu/pittsburgh


 22%|██▏       | 3171/14621 [1:46:45<4:56:42,  1.55s/it]

Failed to parse URL at index 3170, URL: http://www.pghhistory.org/


 22%|██▏       | 3174/14621 [1:46:49<4:24:46,  1.39s/it]

Failed to parse URL at index 3173, URL: http://peoplemaps.esri.com/pittviewer/


 22%|██▏       | 3176/14621 [1:46:51<3:40:21,  1.16s/it]

Failed to parse URL at index 3175, URL: http://www.lifeinwesternpa.org/


 22%|██▏       | 3178/14621 [1:46:58<8:07:26,  2.56s/it]

Failed to parse URL at index 3177, URL: http://www2.washjeff.edu/german/pittsburgh/


 22%|██▏       | 3181/14621 [1:47:04<7:01:58,  2.21s/it]

Failed to parse URL at index 3181, URL: http://triblive.com/local/allegheny/12376964-74/wayward-record-of-pittsburghs-early-years-recovered-by-archivist


 22%|██▏       | 3193/14621 [1:47:18<4:42:05,  1.48s/it]

Failed to parse URL at index 3192, URL: https://pittsburghpa.gov/pittsburgh/pgh-about


 22%|██▏       | 3194/14621 [1:47:19<4:49:32,  1.52s/it]

Failed to parse URL at index 3193, URL: https://pittsburghpa.gov/index.html


 22%|██▏       | 3196/14621 [1:47:23<5:29:47,  1.73s/it]

Failed to parse URL at index 3195, URL: https://pittsburghpa.gov/guia-para-residentes-de-la-ciudad-de-pittsburgh/introduccion


 22%|██▏       | 3198/14621 [1:47:26<4:45:22,  1.50s/it]

Failed to parse URL at index 3197, URL: https://pittsburghpa.gov/mayor/covid-updates


 22%|██▏       | 3199/14621 [1:47:28<4:52:54,  1.54s/it]

Failed to parse URL at index 3198, URL: https://pittsburghpa.gov/dcp/ccb-ada


 22%|██▏       | 3200/14621 [1:47:28<4:04:48,  1.29s/it]

Failed to parse URL at index 3199, URL: https://pittsburghpa.gov/city-info/frequent-numbers


 22%|██▏       | 3201/14621 [1:47:30<4:20:43,  1.37s/it]

Failed to parse URL at index 3200, URL: https://pittsburghpa.gov/city-info/socialmedia


 22%|██▏       | 3202/14621 [1:47:32<4:31:38,  1.43s/it]

Failed to parse URL at index 3201, URL: https://pittsburghpa.gov/pittsburgh/page.html


 22%|██▏       | 3203/14621 [1:47:32<3:52:38,  1.22s/it]

Failed to parse URL at index 3202, URL: https://pittsburghpa.gov/citiparks/parks.html


 22%|██▏       | 3205/14621 [1:47:38<6:24:57,  2.02s/it]

Failed to parse URL at index 3204, URL: https://pittsburghpa.gov/dcp/index.html


 22%|██▏       | 3206/14621 [1:47:40<6:00:43,  1.90s/it]

Failed to parse URL at index 3205, URL: https://pittsburghpa.gov/chr/index.html


 22%|██▏       | 3208/14621 [1:47:44<6:06:17,  1.93s/it]

Failed to parse URL at index 3207, URL: https://pittsburghpa.gov/ehb/index.html


 22%|██▏       | 3209/14621 [1:47:45<4:55:30,  1.55s/it]

Failed to parse URL at index 3208, URL: https://pittsburghpa.gov/finance/finance.html


 22%|██▏       | 3210/14621 [1:47:45<4:05:21,  1.29s/it]

Failed to parse URL at index 3209, URL: https://pittsburghpa.gov/innovation-performance/index.html


 22%|██▏       | 3211/14621 [1:47:46<3:30:46,  1.11s/it]

Failed to parse URL at index 3210, URL: https://pittsburghpa.gov/humanresources/index.html


 22%|██▏       | 3213/14621 [1:47:48<3:15:24,  1.03s/it]

Failed to parse URL at index 3212, URL: https://pittsburghpa.gov/domi/index.html


 22%|██▏       | 3214/14621 [1:47:50<3:46:07,  1.19s/it]

Failed to parse URL at index 3213, URL: https://pittsburghpa.gov/ochs/index.html


 22%|██▏       | 3217/14621 [1:47:55<4:45:13,  1.50s/it]

Failed to parse URL at index 3216, URL: https://pittsburghpa.gov/publicsafety/index.html


 22%|██▏       | 3218/14621 [1:47:55<3:57:02,  1.25s/it]

Failed to parse URL at index 3217, URL: https://pittsburghpa.gov/dpw/index.html


 22%|██▏       | 3220/14621 [1:48:00<5:13:27,  1.65s/it]

Failed to parse URL at index 3219, URL: https://pittsburghpa.gov/events/index.html


 22%|██▏       | 3221/14621 [1:48:01<4:37:55,  1.46s/it]

Failed to parse URL at index 3220, URL: https://pittsburghpa.shinyapps.io/BurghsEyeView/


 22%|██▏       | 3223/14621 [1:48:02<3:25:19,  1.08s/it]

Failed to parse URL at index 3221, URL: https://pittsburghpa.gov/weather/index.html


 22%|██▏       | 3225/14621 [1:48:06<4:41:33,  1.48s/it]

Failed to parse URL at index 3224, URL: https://pittsburghpa.gov/mayor/welcoming-pgh/index.html


 22%|██▏       | 3229/14621 [1:48:11<3:29:15,  1.10s/it]

Failed to parse URL at index 3228, URL: https://pittsburghpa.gov/bac/bac.html


 22%|██▏       | 3230/14621 [1:48:11<3:15:36,  1.03s/it]

Failed to parse URL at index 3229, URL: https://pittsburghpa.gov/clerk/index.html


 22%|██▏       | 3231/14621 [1:48:12<2:53:55,  1.09it/s]

Failed to parse URL at index 3230, URL: https://pittsburghpa.gov/council/index.html


 22%|██▏       | 3233/14621 [1:48:15<3:14:54,  1.03s/it]

Failed to parse URL at index 3232, URL: https://pittsburghpa.gov/omb/community-development-block-grant


 22%|██▏       | 3234/14621 [1:48:16<3:48:45,  1.21s/it]

Failed to parse URL at index 3233, URL: https://pittsburghpa.gov/cmptf/index.html


 22%|██▏       | 3235/14621 [1:48:17<3:16:28,  1.04s/it]

Failed to parse URL at index 3234, URL: https://pittsburghpa.gov/controller/index.html


 22%|██▏       | 3236/14621 [1:48:17<2:53:58,  1.09it/s]

Failed to parse URL at index 3235, URL: https://pittsburghpa.gov/law/index.html


 22%|██▏       | 3237/14621 [1:48:18<2:38:15,  1.20it/s]

Failed to parse URL at index 3236, URL: https://pittsburghpa.gov/mayor/city-staff


 22%|██▏       | 3238/14621 [1:48:19<2:30:52,  1.26it/s]

Failed to parse URL at index 3237, URL: https://pittsburghpa.gov/mayor/index.html


 22%|██▏       | 3239/14621 [1:48:20<2:29:04,  1.27it/s]

Failed to parse URL at index 3238, URL: https://pittsburghpa.gov/pension/index.html


 22%|██▏       | 3240/14621 [1:48:20<2:23:16,  1.32it/s]

Failed to parse URL at index 3239, URL: https://pittsburghpa.gov/omb/omb.html


 22%|██▏       | 3241/14621 [1:48:21<2:16:23,  1.39it/s]

Failed to parse URL at index 3240, URL: https://pittsburghpa.gov/omi/index.html


 22%|██▏       | 3242/14621 [1:48:22<2:14:41,  1.41it/s]

Failed to parse URL at index 3241, URL: https://pittsburghpa.gov/finance/opeb


 22%|██▏       | 3243/14621 [1:48:22<2:11:46,  1.44it/s]

Failed to parse URL at index 3242, URL: https://pittsburghpa.gov/city-county-100/index.html


 22%|██▏       | 3244/14621 [1:48:23<1:57:12,  1.62it/s]

Failed to parse URL at index 3243, URL: https://onestoppgh.pittsburghpa.gov/pghprod/pub/lms/Login.aspx


 22%|██▏       | 3246/14621 [1:48:24<2:03:40,  1.53it/s]

Failed to parse URL at index 3245, URL: https://pittsburghpa.gov/dashburgh/index.html


 22%|██▏       | 3250/14621 [1:48:31<3:52:05,  1.22s/it]

Failed to parse URL at index 3249, URL: https://webstats.pittsburghpa.gov/


 22%|██▏       | 3251/14621 [1:48:32<3:18:44,  1.05s/it]

Failed to parse URL at index 3250, URL: https://pittsburghpa.gov/finance/finance-permits-licenses


 22%|██▏       | 3252/14621 [1:48:37<7:04:28,  2.24s/it]

Failed to parse URL at index 3251, URL: https://www.openbookpittsburgh.com/


 22%|██▏       | 3257/14621 [1:48:44<4:44:01,  1.50s/it]

Failed to parse URL at index 3256, URL: https://pittsburghpa.gov/controller/watchdog


 22%|██▏       | 3258/14621 [1:48:45<3:57:17,  1.25s/it]

Failed to parse URL at index 3257, URL: https://pittsburghpa.gov/snowangels/index.html


 22%|██▏       | 3259/14621 [1:48:45<3:25:33,  1.09s/it]

Failed to parse URL at index 3258, URL: https://pittsburghpa.gov/weather/snow-plow-tracker


 22%|██▏       | 3261/14621 [1:48:47<2:55:21,  1.08it/s]

Failed to parse URL at index 3260, URL: https://pittsburghpa.gov/city-info/policies


 22%|██▏       | 3262/14621 [1:48:49<3:36:51,  1.15s/it]

Failed to parse URL at index 3261, URL: https://pittsburghpa.gov/city-info/press-releases


 22%|██▏       | 3266/14621 [1:48:56<4:45:14,  1.51s/it]

Failed to parse URL at index 3265, URL: https://pittsburghpa.gov/city-info/release-notes


 22%|██▏       | 3267/14621 [1:48:57<3:57:06,  1.25s/it]

Failed to parse URL at index 3266, URL: https://pittsburghpa.gov/womens-suffrage/index.html


 22%|██▏       | 3269/14621 [1:48:59<3:30:40,  1.11s/it]

Failed to parse URL at index 3268, URL: https://pittsburghpa.gov/city-info/city-events
Failed to parse URL at index 3269, URL: httpss://www.google.com/maps/place/Pittsburgh,+PA/data=!4m2!3m1!1s0x8834f16f48068503:0x8df915a15aa21b34?ved=2ahUKEwjW-r6fis3gAhXvRd8KHau2Bk0Q8gEwAHoECAAQAQ


 22%|██▏       | 3272/14621 [1:49:01<2:52:41,  1.10it/s]

Failed to parse URL at index 3271, URL: https://pittsburghpa.gov/pittsburgh/pgh-sports


 22%|██▏       | 3273/14621 [1:49:02<2:41:47,  1.17it/s]

Failed to parse URL at index 3272, URL: https://pittsburghpa.gov/pittsburgh/cultural-activities


 22%|██▏       | 3274/14621 [1:49:03<2:37:57,  1.20it/s]

Failed to parse URL at index 3273, URL: https://pittsburghpa.gov/pittsburgh/flag-seal


 22%|██▏       | 3275/14621 [1:49:03<2:29:06,  1.27it/s]

Failed to parse URL at index 3274, URL: https://pittsburghpa.gov/mayor/pghmayors


 22%|██▏       | 3276/14621 [1:49:04<2:21:34,  1.34it/s]

Failed to parse URL at index 3275, URL: https://pittsburghpa.gov/city-info/open-gov


 22%|██▏       | 3277/14621 [1:49:05<2:16:16,  1.39it/s]

Failed to parse URL at index 3276, URL: https://pittsburghpa.gov/city-info/release-notes.html


 22%|██▏       | 3278/14621 [1:49:05<2:13:27,  1.42it/s]

Failed to parse URL at index 3277, URL: https://pittsburghpa.gov/dcp/web-accessibility-short


 22%|██▏       | 3279/14621 [1:49:06<2:09:37,  1.46it/s]

Failed to parse URL at index 3278, URL: https://pittsburghpa.gov/311/


 22%|██▏       | 3281/14621 [1:49:09<3:08:06,  1.00it/s]

Failed to parse URL at index 3280, URL: https://pittsburghpa.gov/controller/controller.html


 22%|██▏       | 3283/14621 [1:49:10<2:24:43,  1.31it/s]

Failed to parse URL at index 3282, URL: https://twitter.com/Pittsburgh


 23%|██▎       | 3330/14621 [1:49:22<43:04,  4.37it/s]

Failed to parse URL at index 3328, URL: https://www.britannica.com/stories/the-forum


 23%|██▎       | 3367/14621 [1:49:57<52:54,  3.55it/s]

Failed to parse URL at index 3366, URL: https://x.com/britannica


 23%|██▎       | 3432/14621 [1:50:25<3:08:06,  1.01s/it]

Failed to parse URL at index 3431, URL: https://pittsburghpa.gov/events/page.html


 23%|██▎       | 3433/14621 [1:50:26<2:36:01,  1.20it/s]

Failed to parse URL at index 3432, URL: https://twitter.com/PghEventsOffice


 23%|██▎       | 3435/14621 [1:50:27<2:10:53,  1.42it/s]

Failed to parse URL at index 3434, URL: https://www.instagram.com/pgheventsoffice/


 24%|██▎       | 3437/14621 [1:51:04<25:34:00,  8.23s/it]

Failed to parse URL at index 3436, URL: https://pittsburghpa.gov/events/black-history


 24%|██▎       | 3438/14621 [1:51:05<19:22:13,  6.24s/it]

Failed to parse URL at index 3437, URL: https://pittsburghpa.gov/events/cinema


 24%|██▎       | 3439/14621 [1:51:07<15:01:41,  4.84s/it]

Failed to parse URL at index 3438, URL: https://pittsburghpa.gov/events/arsenal


 24%|██▎       | 3440/14621 [1:51:09<11:58:02,  3.85s/it]

Failed to parse URL at index 3439, URL: https://pittsburghpa.gov/events/banksville


 24%|██▎       | 3441/14621 [1:51:09<8:59:09,  2.89s/it] 

Failed to parse URL at index 3440, URL: https://pittsburghpa.gov/events/brookline


 24%|██▎       | 3442/14621 [1:51:10<6:54:51,  2.23s/it]

Failed to parse URL at index 3441, URL: https://pittsburghpa.gov/events/schenley


 24%|██▎       | 3443/14621 [1:51:11<5:25:59,  1.75s/it]

Failed to parse URL at index 3442, URL: https://pittsburghpa.gov/events/grandview


 24%|██▎       | 3444/14621 [1:51:12<5:16:02,  1.70s/it]

Failed to parse URL at index 3443, URL: https://pittsburghpa.gov/events/highland


 24%|██▎       | 3445/14621 [1:51:13<4:16:57,  1.38s/it]

Failed to parse URL at index 3444, URL: https://pittsburghpa.gov/events/mcbride


 24%|██▎       | 3446/14621 [1:51:13<3:35:17,  1.16s/it]

Failed to parse URL at index 3445, URL: https://pittsburghpa.gov/events/ormsby


 24%|██▎       | 3447/14621 [1:51:15<3:58:29,  1.28s/it]

Failed to parse URL at index 3446, URL: https://pittsburghpa.gov/events/cinema-riverview


 24%|██▎       | 3448/14621 [1:51:16<3:25:41,  1.10s/it]

Failed to parse URL at index 3447, URL: https://pittsburghpa.gov/events/schenley-plaza


 24%|██▎       | 3449/14621 [1:51:17<3:53:06,  1.25s/it]

Failed to parse URL at index 3448, URL: https://pittsburghpa.gov/events/west-end


 24%|██▎       | 3450/14621 [1:51:19<4:16:09,  1.38s/it]

Failed to parse URL at index 3449, URL: https://pittsburghpa.gov/events/community-festivals


 24%|██▎       | 3451/14621 [1:51:20<3:35:25,  1.16s/it]

Failed to parse URL at index 3450, URL: https://pittsburghpa.gov/events/concerts


 24%|██▎       | 3452/14621 [1:51:20<3:07:56,  1.01s/it]

Failed to parse URL at index 3451, URL: https://pittsburghpa.gov/events/bach-beethoven-brunch


 24%|██▎       | 3453/14621 [1:51:21<2:47:51,  1.11it/s]

Failed to parse URL at index 3452, URL: https://pittsburghpa.gov/events/jam-at-grandview


 24%|██▎       | 3454/14621 [1:51:22<2:34:01,  1.21it/s]

Failed to parse URL at index 3453, URL: https://pittsburghpa.gov/events/reservoir-jazz


 24%|██▎       | 3455/14621 [1:51:22<2:24:00,  1.29it/s]

Failed to parse URL at index 3454, URL: https://pittsburghpa.gov/events/riverview-series


 24%|██▎       | 3456/14621 [1:51:24<3:08:14,  1.01s/it]

Failed to parse URL at index 3455, URL: https://pittsburghpa.gov/events/events-directory


 24%|██▎       | 3457/14621 [1:51:25<3:38:19,  1.17s/it]

Failed to parse URL at index 3456, URL: https://pittsburghpa.gov/citiparks/farmers-market


 24%|██▎       | 3458/14621 [1:51:26<3:09:27,  1.02s/it]

Failed to parse URL at index 3457, URL: https://pittsburghpa.gov/citiparks/carrick-market


 24%|██▎       | 3459/14621 [1:51:28<3:40:20,  1.18s/it]

Failed to parse URL at index 3458, URL: https://pittsburghpa.gov/citiparks/east-end-market


 24%|██▎       | 3460/14621 [1:51:29<4:01:28,  1.30s/it]

Failed to parse URL at index 3459, URL: https://pittsburghpa.gov/citiparks/northside-market


 24%|██▎       | 3461/14621 [1:51:30<3:26:14,  1.11s/it]

Failed to parse URL at index 3460, URL: https://pittsburghpa.gov/citiparks/squirrel-hill-market


 24%|██▎       | 3462/14621 [1:51:30<3:00:38,  1.03it/s]

Failed to parse URL at index 3461, URL: https://pittsburghpa.gov/events/footraces


 24%|██▎       | 3464/14621 [1:51:33<3:19:05,  1.07s/it]

Failed to parse URL at index 3463, URL: https://pittsburghpa.gov/events/breeze


 24%|██▎       | 3465/14621 [1:51:34<2:55:53,  1.06it/s]

Failed to parse URL at index 3464, URL: https://pittsburghpa.gov/events/uphill-run-fitness-walk


 24%|██▎       | 3466/14621 [1:51:35<3:29:59,  1.13s/it]

Failed to parse URL at index 3465, URL: https://pittsburghpa.gov/events/run-around-the-square


 24%|██▎       | 3467/14621 [1:51:36<3:03:38,  1.01it/s]

Failed to parse URL at index 3466, URL: https://pittsburghpa.gov/events/great-race


 24%|██▎       | 3468/14621 [1:51:36<2:45:16,  1.12it/s]

Failed to parse URL at index 3467, URL: https://pittsburghpa.gov/events/junior-great-race


 24%|██▎       | 3470/14621 [1:51:38<2:36:54,  1.18it/s]

Failed to parse URL at index 3469, URL: https://pittsburghpa.gov/july4/


 24%|██▍       | 3473/14621 [1:51:41<2:53:12,  1.07it/s]

Failed to parse URL at index 3472, URL: https://pittsburghpa.gov/events/pistons-cruise


 24%|██▍       | 3475/14621 [1:51:43<2:45:11,  1.12it/s]

Failed to parse URL at index 3474, URL: https://pittsburghpa.gov/events/line-dancing


 24%|██▍       | 3476/14621 [1:51:44<2:37:24,  1.18it/s]

Failed to parse URL at index 3475, URL: https://pittsburghpa.gov/schenley/valentines-on-ice


 24%|██▍       | 3477/14621 [1:51:45<2:41:37,  1.15it/s]

Failed to parse URL at index 3476, URL: https://pittsburghpa.gov/press-releases/press-releases.html?id=5847


 24%|██▍       | 3478/14621 [1:51:46<2:45:39,  1.12it/s]

Failed to parse URL at index 3477, URL: https://pittsburghpa.gov/press-releases/press-releases.html?id=5843


 24%|██▍       | 3479/14621 [1:51:47<2:47:07,  1.11it/s]

Failed to parse URL at index 3478, URL: https://pittsburghpa.gov/press-releases/press-releases.html?id=5816


 24%|██▍       | 3480/14621 [1:51:47<2:19:31,  1.33it/s]

Failed to parse URL at index 3479, URL: https://twitter.com/PghEventsOffice?ref_src=twsrc%5Etfw


 24%|██▍       | 3486/14621 [1:51:56<3:38:01,  1.17s/it]

Failed to parse URL at index 3485, URL: https://pittsburghpa.gov/press-releases/press-releases/6474


 24%|██▍       | 3487/14621 [1:51:57<3:09:09,  1.02s/it]

Failed to parse URL at index 3486, URL: https://pittsburghpa.gov/inc/announcement.html?ta=special-events


 24%|██▍       | 3494/14621 [1:52:05<3:04:18,  1.01it/s]

Failed to parse URL at index 3493, URL: https://www.instagram.com/visitpittsburgh/


 24%|██▍       | 3496/14621 [1:52:13<7:23:10,  2.39s/it]

Failed to parse URL at index 3495, URL: https://www.pinterest.com/visitpittsburgh/


 24%|██▍       | 3577/14621 [1:53:59<3:04:28,  1.00s/it]

Failed to parse URL at index 3576, URL: http://www.arrival.pitt.edu/view-your-fall-2023-move-date-time


 25%|██▍       | 3641/14621 [1:55:26<3:59:41,  1.31s/it]

Failed to parse URL at index 3641, URL: https://www.visittheusa.com/
Failed to parse URL at index 3642, URL: https://destinationsinternational.org/


 25%|██▍       | 3646/14621 [1:55:32<4:07:15,  1.35s/it]

Failed to parse URL at index 3645, URL: https://pittsburghpa.gov/mayor/page.html


 25%|██▍       | 3647/14621 [1:55:34<4:23:42,  1.44s/it]

Failed to parse URL at index 3646, URL: https://pittsburghpa.gov/mayor/denny-mayor


 25%|██▍       | 3648/14621 [1:55:35<4:28:53,  1.47s/it]

Failed to parse URL at index 3647, URL: https://pittsburghpa.gov/mayor/darraugh-mayor


 25%|██▍       | 3649/14621 [1:55:37<4:35:23,  1.51s/it]

Failed to parse URL at index 3648, URL: https://pittsburghpa.gov/mayor/snowden-mayor


 25%|██▍       | 3650/14621 [1:55:38<4:37:59,  1.52s/it]

Failed to parse URL at index 3649, URL: https://pittsburghpa.gov/mayor/murray-mayor


 25%|██▍       | 3651/14621 [1:55:39<3:53:16,  1.28s/it]

Failed to parse URL at index 3650, URL: https://pittsburghpa.gov/mayor/lowrie-mayor


 25%|██▍       | 3652/14621 [1:55:41<4:09:54,  1.37s/it]

Failed to parse URL at index 3651, URL: https://pittsburghpa.gov/mayor/pettigrew-mayor


 25%|██▍       | 3653/14621 [1:55:42<4:22:49,  1.44s/it]

Failed to parse URL at index 3652, URL: https://pittsburghpa.gov/mayor/mcclintock-mayor


 25%|██▍       | 3654/14621 [1:55:43<3:41:16,  1.21s/it]

Failed to parse URL at index 3653, URL: https://pittsburghpa.gov/mayor/little-mayor


 25%|██▍       | 3655/14621 [1:55:45<4:00:28,  1.32s/it]

Failed to parse URL at index 3654, URL: https://pittsburghpa.gov/mayor/irwin-mayor


 25%|██▌       | 3656/14621 [1:55:46<4:13:44,  1.39s/it]

Failed to parse URL at index 3655, URL: https://pittsburghpa.gov/mayor/thomson-mayor


 25%|██▌       | 3657/14621 [1:55:47<3:33:12,  1.17s/it]

Failed to parse URL at index 3656, URL: https://pittsburghpa.gov/mayor/hay-mayor


 25%|██▌       | 3658/14621 [1:55:48<3:55:34,  1.29s/it]

Failed to parse URL at index 3657, URL: https://pittsburghpa.gov/mayor/howard-mayor


 25%|██▌       | 3659/14621 [1:55:49<3:21:09,  1.10s/it]

Failed to parse URL at index 3658, URL: https://pittsburghpa.gov/mayor/kerr-mayor


 25%|██▌       | 3660/14621 [1:55:50<2:56:41,  1.03it/s]

Failed to parse URL at index 3659, URL: https://pittsburghpa.gov/mayor/adams-mayor


 25%|██▌       | 3661/14621 [1:55:50<2:42:32,  1.12it/s]

Failed to parse URL at index 3660, URL: https://pittsburghpa.gov/mayor/herron-mayor


 25%|██▌       | 3662/14621 [1:55:51<2:28:44,  1.23it/s]

Failed to parse URL at index 3661, URL: https://pittsburghpa.gov/mayor/barker-mayor


 25%|██▌       | 3663/14621 [1:55:52<2:19:56,  1.31it/s]

Failed to parse URL at index 3662, URL: https://pittsburghpa.gov/mayor/jguthrie-mayor


 25%|██▌       | 3664/14621 [1:55:52<2:17:36,  1.33it/s]

Failed to parse URL at index 3663, URL: https://pittsburghpa.gov/mayor/riddle-mayor


 25%|██▌       | 3665/14621 [1:55:53<2:12:32,  1.38it/s]

Failed to parse URL at index 3664, URL: https://pittsburghpa.gov/mayor/volz-mayor


 25%|██▌       | 3666/14621 [1:55:54<2:08:28,  1.42it/s]

Failed to parse URL at index 3665, URL: https://pittsburghpa.gov/mayor/bingham-mayor


 25%|██▌       | 3667/14621 [1:55:54<2:11:17,  1.39it/s]

Failed to parse URL at index 3666, URL: https://pittsburghpa.gov/mayor/weaver-mayor


 25%|██▌       | 3668/14621 [1:55:55<2:08:04,  1.43it/s]

Failed to parse URL at index 3667, URL: https://pittsburghpa.gov/mayor/wilson-mayor


 25%|██▌       | 3669/14621 [1:55:56<2:05:11,  1.46it/s]

Failed to parse URL at index 3668, URL: https://pittsburghpa.gov/mayor/sawyer-mayor


 25%|██▌       | 3670/14621 [1:55:57<2:53:14,  1.05it/s]

Failed to parse URL at index 3669, URL: https://pittsburghpa.gov/mayor/lowry-mayor


 25%|██▌       | 3671/14621 [1:55:58<2:36:03,  1.17it/s]

Failed to parse URL at index 3670, URL: https://pittsburghpa.gov/mayor/mccarthy-mayor


 25%|██▌       | 3672/14621 [1:56:00<3:15:20,  1.07s/it]

Failed to parse URL at index 3671, URL: https://pittsburghpa.gov/mayor/blackmore-mayor


 25%|██▌       | 3673/14621 [1:56:00<2:52:08,  1.06it/s]

Failed to parse URL at index 3672, URL: https://pittsburghpa.gov/mayor/bush-mayor


 25%|██▌       | 3674/14621 [1:56:01<2:36:11,  1.17it/s]

Failed to parse URL at index 3673, URL: https://pittsburghpa.gov/mayor/liddell-mayor


 25%|██▌       | 3675/14621 [1:56:01<2:26:10,  1.25it/s]

Failed to parse URL at index 3674, URL: https://pittsburghpa.gov/mayor/lyon-mayor


 25%|██▌       | 3676/14621 [1:56:02<2:16:51,  1.33it/s]

Failed to parse URL at index 3675, URL: https://pittsburghpa.gov/mayor/fulton-mayor


 25%|██▌       | 3677/14621 [1:56:03<2:12:05,  1.38it/s]

Failed to parse URL at index 3676, URL: https://pittsburghpa.gov/mayor/mccallin-mayor


 25%|██▌       | 3678/14621 [1:56:04<2:13:00,  1.37it/s]

Failed to parse URL at index 3677, URL: https://pittsburghpa.gov/mayor/gourley-mayor


 25%|██▌       | 3679/14621 [1:56:05<2:59:34,  1.02it/s]

Failed to parse URL at index 3678, URL: https://pittsburghpa.gov/mayor/mckenna-mayor


 25%|██▌       | 3680/14621 [1:56:06<2:42:00,  1.13it/s]

Failed to parse URL at index 3679, URL: https://pittsburghpa.gov/mayor/ford-mayor


 25%|██▌       | 3681/14621 [1:56:06<2:28:04,  1.23it/s]

Failed to parse URL at index 3680, URL: https://pittsburghpa.gov/mayor/diehl-mayor


 25%|██▌       | 3682/14621 [1:56:07<2:19:03,  1.31it/s]

Failed to parse URL at index 3681, URL: https://pittsburghpa.gov/mayor/brown-mayor


 25%|██▌       | 3683/14621 [1:56:08<2:11:49,  1.38it/s]

Failed to parse URL at index 3682, URL: https://pittsburghpa.gov/mayor/obrown-mayor


 25%|██▌       | 3684/14621 [1:56:08<2:07:45,  1.43it/s]

Failed to parse URL at index 3683, URL: https://pittsburghpa.gov/mayor/hays-mayor


 25%|██▌       | 3685/14621 [1:56:10<2:55:09,  1.04it/s]

Failed to parse URL at index 3684, URL: https://pittsburghpa.gov/mayor/gguthrie-mayor


 25%|██▌       | 3686/14621 [1:56:11<2:39:36,  1.14it/s]

Failed to parse URL at index 3685, URL: https://pittsburghpa.gov/mayor/magee-mayor


 25%|██▌       | 3687/14621 [1:56:11<2:27:31,  1.24it/s]

Failed to parse URL at index 3686, URL: https://pittsburghpa.gov/mayor/armstrong-mayor


 25%|██▌       | 3688/14621 [1:56:12<2:18:04,  1.32it/s]

Failed to parse URL at index 3687, URL: https://pittsburghpa.gov/mayor/babcock-mayor


 25%|██▌       | 3689/14621 [1:56:13<2:11:31,  1.39it/s]

Failed to parse URL at index 3688, URL: https://pittsburghpa.gov/mayor/kline-mayor


 25%|██▌       | 3690/14621 [1:56:13<2:08:26,  1.42it/s]

Failed to parse URL at index 3689, URL: https://pittsburghpa.gov/mayor/jherron-mayor


 25%|██▌       | 3691/14621 [1:56:14<2:08:21,  1.42it/s]

Failed to parse URL at index 3690, URL: https://pittsburghpa.gov/mayor/mcnair-mayor


 25%|██▌       | 3692/14621 [1:56:15<2:04:36,  1.46it/s]

Failed to parse URL at index 3691, URL: https://pittsburghpa.gov/mayor/scully-mayor


 25%|██▌       | 3693/14621 [1:56:16<2:57:38,  1.03it/s]

Failed to parse URL at index 3692, URL: https://pittsburghpa.gov/mayor/lawrence-mayor


 25%|██▌       | 3694/14621 [1:56:17<2:41:39,  1.13it/s]

Failed to parse URL at index 3693, URL: https://pittsburghpa.gov/mayor/gallagher-mayor


 25%|██▌       | 3695/14621 [1:56:18<2:30:19,  1.21it/s]

Failed to parse URL at index 3694, URL: https://pittsburghpa.gov/mayor/barr-mayor


 25%|██▌       | 3696/14621 [1:56:18<2:19:42,  1.30it/s]

Failed to parse URL at index 3695, URL: https://pittsburghpa.gov/mayor/flaherty-mayor


 25%|██▌       | 3697/14621 [1:56:19<2:12:02,  1.38it/s]

Failed to parse URL at index 3696, URL: https://pittsburghpa.gov/mayor/caliguiri-mayor


 25%|██▌       | 3698/14621 [1:56:19<2:07:51,  1.42it/s]

Failed to parse URL at index 3697, URL: https://pittsburghpa.gov/mayor/masloff-mayor


 25%|██▌       | 3699/14621 [1:56:20<2:04:58,  1.46it/s]

Failed to parse URL at index 3698, URL: https://pittsburghpa.gov/mayor/murphy-mayor


 25%|██▌       | 3700/14621 [1:56:21<2:02:58,  1.48it/s]

Failed to parse URL at index 3699, URL: https://pittsburghpa.gov/mayor/oconnor-mayor


 25%|██▌       | 3701/14621 [1:56:21<2:07:13,  1.43it/s]

Failed to parse URL at index 3700, URL: https://pittsburghpa.gov/mayor/ravenstahl-mayor


 25%|██▌       | 3702/14621 [1:56:22<2:04:42,  1.46it/s]

Failed to parse URL at index 3701, URL: https://pittsburghpa.gov/mayor/mayor-peduto


 25%|██▌       | 3703/14621 [1:56:23<2:10:36,  1.39it/s]

Failed to parse URL at index 3702, URL: https://pittsburghpa.gov/mayor/mayor-profile


 25%|██▌       | 3704/14621 [1:56:24<2:06:17,  1.44it/s]

Failed to parse URL at index 3703, URL: https://pittsburghpa.gov/mayor/mayor-contact


 25%|██▌       | 3707/14621 [1:56:33<6:15:49,  2.07s/it]

Failed to parse URL at index 3706, URL: https://pittsburghpa.gov/city-info/press-releases.html


 25%|██▌       | 3708/14621 [1:56:33<4:57:41,  1.64s/it]

Failed to parse URL at index 3707, URL: https://pittsburghpa.gov/city-info/executive-orders


 25%|██▌       | 3718/14621 [1:56:51<7:41:06,  2.54s/it]

Failed to parse URL at index 3717, URL: https://en.wikipedia.org/w/index.php?title=Economy_of_Pittsburgh&action=history


 26%|██▌       | 3772/14621 [1:57:59<3:36:35,  1.20s/it]

Failed to parse URL at index 3771, URL: https://en.wikipedia.org/w/index.php?title=Clairton_Steel_Works&action=edit&redlink=1


 26%|██▌       | 3773/14621 [1:58:01<4:01:57,  1.34s/it]

Failed to parse URL at index 3772, URL: https://en.wikipedia.org/w/index.php?title=Duquesne_Steel_Works&action=edit&redlink=1


 26%|██▌       | 3775/14621 [1:58:02<3:24:48,  1.13s/it]

Failed to parse URL at index 3774, URL: https://en.wikipedia.org/w/index.php?title=Mon_Valley_Works_%E2%80%93_Irvin_Plant&action=edit&redlink=1


 26%|██▌       | 3796/14621 [1:58:32<3:59:42,  1.33s/it]

Failed to parse URL at index 3795, URL: https://en.wikipedia.org/w/index.php?title=IGA_(Supermarkets)&action=edit&redlink=1


 26%|██▋       | 3851/14621 [1:59:46<7:44:10,  2.59s/it]

Failed to parse URL at index 3850, URL: https://web.archive.org/web/20091228071847/http://www.carnegie.org/sub/about/biography.html


 26%|██▋       | 3852/14621 [1:59:48<7:14:45,  2.42s/it]

Failed to parse URL at index 3851, URL: http://www.carnegie.org/sub/about/biography.html


 26%|██▋       | 3854/14621 [1:59:51<6:02:50,  2.02s/it]

Failed to parse URL at index 3853, URL: http://www.bgsu.edu/departments/acs/1890s/carnegie/strike.html


 26%|██▋       | 3857/14621 [1:59:56<4:46:54,  1.60s/it]

Failed to parse URL at index 3857, URL: https://www.nytimes.com/1987/02/05/business/usx-to-shut-3-plants-cut-3700-jobs.html


 26%|██▋       | 3865/14621 [2:00:05<4:46:27,  1.60s/it]

Failed to parse URL at index 3864, URL: http://www.riversofsteel.com/routestoroots/3RsWindows/homestead_works.html


 26%|██▋       | 3873/14621 [2:00:40<15:17:08,  5.12s/it]

Failed to parse URL at index 3872, URL: http://digital.library.pitt.edu/cgi-bin/chronology/chronology_driver.pl?searchtype=dbrowse;year=1900;year2=1909;start_line=15


 27%|██▋       | 3878/14621 [2:00:45<5:00:30,  1.68s/it]

Failed to parse URL at index 3877, URL: http://ushistory.pwnet.org/resources/II.2.b.php


 27%|██▋       | 3879/14621 [2:00:48<6:09:12,  2.06s/it]

Failed to parse URL at index 3878, URL: http://www.civics-online.org/library/formatted/texts/wagner_act.html


 27%|██▋       | 3882/14621 [2:00:54<5:31:41,  1.85s/it]

Failed to parse URL at index 3881, URL: http://www.ridc.org/pages/about_who.aspx


 27%|██▋       | 3884/14621 [2:00:58<5:40:50,  1.90s/it]

Failed to parse URL at index 3883, URL: http://www.ridc.org/pages/about_what.aspx


 27%|██▋       | 3892/14621 [2:01:09<2:33:43,  1.16it/s]

Failed to parse URL at index 3891, URL: http://www.pittsburghfuture.com/economy.html
Failed to parse URL at index 3892, URL: http://pittsburghlive.com/images/video//2008_pdfs/GX-pncTimeline-JL-10-28.pdf


 27%|██▋       | 3896/14621 [2:01:15<4:28:01,  1.50s/it]

Failed to parse URL at index 3896, URL: http://www.bls.gov/ro3/cesqpitt.htm


 27%|██▋       | 3899/14621 [2:01:20<5:03:47,  1.70s/it]

Failed to parse URL at index 3898, URL: http://www.alleghenyconference.org/PittsburghRegionalAlliance/RegionalData/RegionalOverview/CountyAllegheny.php


 27%|██▋       | 3901/14621 [2:01:24<4:52:34,  1.64s/it]

Failed to parse URL at index 3900, URL: http://www.pittsburghlive.com/x/pittsburghtrib/business/s_631379.html


 27%|██▋       | 3902/14621 [2:01:28<6:25:50,  2.16s/it]

Failed to parse URL at index 3901, URL: http://www.ldaamerica.org/about/index.asp


 27%|██▋       | 3906/14621 [2:01:35<5:00:11,  1.68s/it]

Failed to parse URL at index 3905, URL: http://www.brookings.edu/reports/2009/06_metro_monitor.aspx


 27%|██▋       | 3908/14621 [2:01:38<4:26:41,  1.49s/it]

Failed to parse URL at index 3908, URL: http://triblive.com/sports/college/pitt/5796744-74/cavanaugh-pitt-crons


 27%|██▋       | 3911/14621 [2:01:44<5:20:12,  1.79s/it]

Failed to parse URL at index 3910, URL: http://www.alleghenyconference.org/PDFs/PRAFactSheets/FS09ThreeReasons.pdf


 27%|██▋       | 3915/14621 [2:01:53<5:34:06,  1.87s/it]

Failed to parse URL at index 3913, URL: https://en.wikipedia.org/w/index.php?title=Margaret_Wilder_and_Barry_Rubin&action=edit&redlink=1
Failed to parse URL at index 3914, URL: https://doi.org/10.1080%2F01944369608975713


 27%|██▋       | 3919/14621 [2:01:54<1:59:55,  1.49it/s]

Failed to parse URL at index 3915, URL: http://obs.rc.fas.harvard.edu/chetty/mobility_geo.pdf


 27%|██▋       | 3921/14621 [2:01:59<4:01:46,  1.36s/it]

Failed to parse URL at index 3920, URL: http://www.post-gazette.com/stories/business/news/new-heinz-owners-cut-600-jobs-including-350-in-pittsburgh-699153/


 27%|██▋       | 3923/14621 [2:02:00<3:12:31,  1.08s/it]

Failed to parse URL at index 3923, URL: http://www.bls.gov/ro3/oesphl.htm


 27%|██▋       | 3930/14621 [2:02:11<6:19:48,  2.13s/it]

Failed to parse URL at index 3929, URL: https://web.archive.org/web/20100612060013/http://pittsburgh.net/about_pittsburgh_toptenemployers.cfm


 27%|██▋       | 3931/14621 [2:02:16<8:40:06,  2.92s/it]

Failed to parse URL at index 3930, URL: http://www.g20pittsburghsummit.org/media-resources/pittsburgh-in-the-news/


 27%|██▋       | 3932/14621 [2:02:22<10:43:03,  3.61s/it]

Failed to parse URL at index 3931, URL: https://web.archive.org/web/20090822001149/http://www.g20pittsburghsummit.org/media-resources/pittsburgh-in-the-news/


 27%|██▋       | 3933/14621 [2:02:22<8:09:11,  2.75s/it] 

Failed to parse URL at index 3933, URL: https://www.nytimes.com/2005/01/12/sports/football/12steelers.html?_r=0


 29%|██▉       | 4207/14621 [2:07:54<2:03:49,  1.40it/s]

Failed to parse URL at index 4206, URL: https://en.wikipedia.org/w/index.php?title=The_Maridon_Museum&action=edit&redlink=1


 29%|██▉       | 4208/14621 [2:07:55<2:12:43,  1.31it/s]

Failed to parse URL at index 4207, URL: https://en.wikipedia.org/w/index.php?title=University_Museum_at_Indiana_University_of_Pennsylvania&action=edit&redlink=1


 29%|██▉       | 4209/14621 [2:07:56<2:39:37,  1.09it/s]

Failed to parse URL at index 4208, URL: https://en.wikipedia.org/w/index.php?title=Hoyt_Institute_of_Fine_Arts&action=edit&redlink=1


 29%|██▉       | 4210/14621 [2:07:58<3:06:06,  1.07s/it]

Failed to parse URL at index 4209, URL: https://en.wikipedia.org/w/index.php?title=Venango_Museum_of_Art,_Science,_and_Industry&action=edit&redlink=1


 29%|██▉       | 4215/14621 [2:08:02<2:45:00,  1.05it/s]

Failed to parse URL at index 4214, URL: https://en.wikipedia.org/w/index.php?title=Juniata_College_Museum_of_Art&action=edit&redlink=1


 29%|██▉       | 4276/14621 [2:09:20<3:46:48,  1.32s/it]

Failed to parse URL at index 4275, URL: https://www.census.gov/population/cencounts/az190090.txt


 29%|██▉       | 4277/14621 [2:09:21<3:51:22,  1.34s/it]

Failed to parse URL at index 4276, URL: https://www.census.gov/population/www/cen2000/briefs/phc-t4/tables/tab02.pdf


 29%|██▉       | 4280/14621 [2:09:26<4:12:47,  1.47s/it]

Failed to parse URL at index 4280, URL: https://search.worldcat.org/issn/1932-6203


 29%|██▉       | 4284/14621 [2:09:33<6:12:55,  2.16s/it]

Failed to parse URL at index 4283, URL: http://digital.library.pitt.edu/islandora/object/pitt:31735057893269/viewer#page/20/mode/2up


 29%|██▉       | 4286/14621 [2:09:36<5:15:14,  1.83s/it]

Failed to parse URL at index 4285, URL: https://www.census.gov/data/tables/2016/demo/popest/counties-total.html


 29%|██▉       | 4292/14621 [2:10:07<14:51:43,  5.18s/it]

Failed to parse URL at index 4291, URL: https://www.washingtonpost.com/wp-dyn/content/article/2009/09/23/AR2009092304713.html


 29%|██▉       | 4293/14621 [2:10:09<11:37:12,  4.05s/it]

Failed to parse URL at index 4293, URL: https://thehill.com/blogs/congress-blog/politics/376813-great-lakes-chambers-of-commerce-congressional-leaders-need-to


 29%|██▉       | 4296/14621 [2:10:14<8:37:47,  3.01s/it]

Failed to parse URL at index 4295, URL: http://www.post-gazette.com/pg/11002/1114678-109.stm


 29%|██▉       | 4298/14621 [2:10:19<7:44:01,  2.70s/it]

Failed to parse URL at index 4298, URL: http://www.crainscleveland.com/article/20070423/SUB/70420005


 29%|██▉       | 4304/14621 [2:10:30<7:40:31,  2.68s/it]

Failed to parse URL at index 4303, URL: https://web.archive.org/web/20160304082637/http://aapa.files.cms-plus.com/PDFs/2011%20U%20S%20%20PORT%20RANKINGS%20BY%20CARGO%20TONNAGE.pdf


 29%|██▉       | 4306/14621 [2:10:44<12:18:27,  4.30s/it]

Failed to parse URL at index 4305, URL: http://www.spcregion.org/pdf/servicearea10.pdf


 29%|██▉       | 4309/14621 [2:10:51<9:18:45,  3.25s/it]

Failed to parse URL at index 4308, URL: https://web.archive.org/web/20180307151129/http://geography.vt.edu/department/people/scarpaci.html


 29%|██▉       | 4310/14621 [2:10:52<7:29:38,  2.62s/it]

Failed to parse URL at index 4309, URL: https://geography.vt.edu/department/people/scarpaci.html


 30%|██▉       | 4314/14621 [2:10:57<4:21:04,  1.52s/it]

Failed to parse URL at index 4313, URL: http://www.mattress.org/content/history


 30%|██▉       | 4323/14621 [2:11:08<5:39:59,  1.98s/it]

Failed to parse URL at index 4322, URL: http://olduli.nli.org.il/F/?func=find-b&local_base=NLX10&find_code=UID&request=987007465329805171


 30%|██▉       | 4360/14621 [2:11:50<3:00:24,  1.05s/it]

Failed to parse URL at index 4359, URL: https://en.wikipedia.org/w/index.php?title=U.S._District_Attorney_for_the_Western_District_of_Pennsylvania&action=edit&redlink=1


 30%|██▉       | 4368/14621 [2:12:01<6:01:35,  2.12s/it]

Failed to parse URL at index 4367, URL: http://www.post-gazette.com/local/city/2014/12/05/Pittsburgh-submits-more-budget-information-to-Intergovernmental-Cooperation-Authority/stories/201412050172


 30%|██▉       | 4369/14621 [2:12:02<4:47:38,  1.68s/it]

Failed to parse URL at index 4368, URL: http://www.city.pittsburgh.pa.us/mayor/html/boards_commissions.html


 30%|██▉       | 4372/14621 [2:12:07<5:16:36,  1.85s/it]

Failed to parse URL at index 4371, URL: http://library.municode.com/index.aspx?clientID=13525


 31%|███       | 4495/14621 [2:14:27<11:33:40,  4.11s/it]

Failed to parse URL at index 4494, URL: http://www.freeridepgh.org/


 31%|███       | 4531/14621 [2:15:10<7:05:28,  2.53s/it]

Failed to parse URL at index 4530, URL: http://www.wlerwy.com/


 31%|███       | 4549/14621 [2:15:33<6:10:17,  2.21s/it]

Failed to parse URL at index 4548, URL: http://www.post-gazette.com/pg/05030/448976.stm


 31%|███       | 4550/14621 [2:15:33<4:40:52,  1.67s/it]

Failed to parse URL at index 4549, URL: https://www.wired.com/autopia/2010/12/the-steepest-road-on-earth-takes-no-prisoners/


 31%|███       | 4552/14621 [2:15:36<4:17:31,  1.53s/it]

Failed to parse URL at index 4551, URL: http://www.portauthority.org/PAAC/CompanyInfo/GeneralStatistics/tabid/68/Default.aspx


 31%|███       | 4553/14621 [2:15:37<4:06:19,  1.47s/it]

Failed to parse URL at index 4552, URL: http://www.myerscoachlines.com/myers/PageInfo/commuter.aspx


 31%|███       | 4556/14621 [2:15:40<2:50:41,  1.02s/it]

Failed to parse URL at index 4554, URL: http://www.spcregion.org/pdf/RegionalTransitProfile.pdf
Failed to parse URL at index 4555, URL: http://www.usairways.com/common/resources/_downloads/usaroutemap.pdf


 31%|███       | 4557/14621 [2:15:45<5:46:12,  2.06s/it]

Failed to parse URL at index 4556, URL: http://www.post-gazette.com/local/city/2014/04/01/Peduto-Pittsburgh-to-have-a-dedicated-Downtown-bike-lane-by-September/stories/201404010174


 31%|███       | 4558/14621 [2:15:50<8:01:01,  2.87s/it]

Failed to parse URL at index 4557, URL: http://www.post-gazette.com/local/city/2014/09/06/Bike-lanes-go-live-Downtown/stories/201409060054


 31%|███       | 4559/14621 [2:15:55<9:42:00,  3.47s/it]

Failed to parse URL at index 4558, URL: http://www.post-gazette.com/life/recreation/2015/04/08/Bike-sharing-program-to-begin-in-Pittsburgh-next-month/stories/201504080189


 31%|███       | 4561/14621 [2:16:01<9:12:10,  3.29s/it]

Failed to parse URL at index 4560, URL: http://www.phlf.org/news/mediaclips/2005/20050326TR_Steps.html


 32%|███▏      | 4671/14621 [2:17:58<3:13:08,  1.16s/it]

Failed to parse URL at index 4670, URL: https://en.wikipedia.org/w/index.php?title=Pennsylvania_Institute_of_Health_and_Technology&action=edit&redlink=1


 32%|███▏      | 4672/14621 [2:17:58<2:57:00,  1.07s/it]

Failed to parse URL at index 4671, URL: https://en.wikipedia.org/w/index.php?title=Rosedale_Technical_Institute&action=edit&redlink=1


 32%|███▏      | 4673/14621 [2:18:00<3:34:26,  1.29s/it]

Failed to parse URL at index 4672, URL: https://en.wikipedia.org/w/index.php?title=Penn_Commercial_Business_and_Technical_School&action=edit&redlink=1


 32%|███▏      | 4674/14621 [2:18:01<3:33:09,  1.29s/it]

Failed to parse URL at index 4673, URL: https://en.wikipedia.org/w/index.php?title=Newport_Business_Institute&action=edit&redlink=1


 32%|███▏      | 4676/14621 [2:18:04<3:29:42,  1.27s/it]

Failed to parse URL at index 4675, URL: https://en.wikipedia.org/w/index.php?title=Laurel_Business_Institute&action=edit&redlink=1


 32%|███▏      | 4678/14621 [2:18:06<3:24:29,  1.23s/it]

Failed to parse URL at index 4677, URL: https://en.wikipedia.org/w/index.php?title=Erie_Business_Center&action=edit&redlink=1


 32%|███▏      | 4680/14621 [2:18:09<3:42:28,  1.34s/it]

Failed to parse URL at index 4679, URL: https://en.wikipedia.org/w/index.php?title=Dean_Institute_of_Technology&action=edit&redlink=1


 32%|███▏      | 4682/14621 [2:18:11<3:13:07,  1.17s/it]

Failed to parse URL at index 4681, URL: https://en.wikipedia.org/w/index.php?title=Cambria-Rowe_Business_College&action=edit&redlink=1


 32%|███▏      | 4683/14621 [2:18:12<3:21:55,  1.22s/it]

Failed to parse URL at index 4682, URL: https://en.wikipedia.org/w/index.php?title=Career_Training_Academy&action=edit&redlink=1


 32%|███▏      | 4689/14621 [2:18:17<2:42:25,  1.02it/s]

Failed to parse URL at index 4688, URL: https://en.wikipedia.org/w/index.php?title=American_Academy_of_Culinary_Arts_(AACA)&action=edit&redlink=1


 32%|███▏      | 4693/14621 [2:18:20<2:11:38,  1.26it/s]

Failed to parse URL at index 4692, URL: https://ptcollege.edu/programs/


 34%|███▎      | 4910/14621 [2:22:11<3:24:14,  1.26s/it]

Failed to parse URL at index 4909, URL: https://en.wikipedia.org/wiki/The_Bulletin_(Pittsburgh)


 34%|███▍      | 4987/14621 [2:23:45<2:37:27,  1.02it/s]

Failed to parse URL at index 4986, URL: https://journals.psu.edu/wph/article/view/2090


 34%|███▍      | 4989/14621 [2:23:49<3:49:10,  1.43s/it]

Failed to parse URL at index 4988, URL: http://www.princeton.edu/~davpro/databases/index.html


 34%|███▍      | 4990/14621 [2:23:51<4:15:01,  1.59s/it]

Failed to parse URL at index 4990, URL: https://search.worldcat.org/oclc/4173355


 34%|███▍      | 4996/14621 [2:23:58<4:12:11,  1.57s/it]

Failed to parse URL at index 4995, URL: http://coloredconventions.org/conventions?by=year


 34%|███▍      | 5005/14621 [2:24:15<4:57:04,  1.85s/it]

Failed to parse URL at index 5005, URL: http://blogs.piratesprospects.com/history/pittsburghs-first-minor-league-team/


 34%|███▍      | 5009/14621 [2:24:23<6:25:44,  2.41s/it]

Failed to parse URL at index 5008, URL: https://archive.org/details/oxfordcompaniont00paul_0/page/405


 34%|███▍      | 5014/14621 [2:24:29<3:24:53,  1.28s/it]

Failed to parse URL at index 5013, URL: http://www.heinzhistorycenter.org/secondary.aspx?id=216


 34%|███▍      | 5021/14621 [2:24:36<2:23:27,  1.12it/s]

Failed to parse URL at index 5020, URL: https://www.census.gov/statab/hist/HS-07.pdf


 34%|███▍      | 5030/14621 [2:24:52<5:02:44,  1.89s/it]

Failed to parse URL at index 5029, URL: http://pittsburghfoundation.org/history


 34%|███▍      | 5034/14621 [2:24:57<3:46:16,  1.42s/it]

Failed to parse URL at index 5033, URL: http://www.newmuseum.org/spaces/listing/country:USA


 34%|███▍      | 5037/14621 [2:25:00<3:03:51,  1.15s/it]

Failed to parse URL at index 5036, URL: http://pluralism.org/religion/timelines


 34%|███▍      | 5038/14621 [2:25:01<3:11:52,  1.20s/it]

Failed to parse URL at index 5037, URL: http://www.svtemple.org/CustomMenu.aspx?EE683979-F4D5-484d-B5ED-1B66C8175838=097dcfc4-88a7-43a9-8a0d-886c05230213&7DD70F2F-A2ED-4e2c-AC6C-6C92715C95B1=2


 34%|███▍      | 5040/14621 [2:25:05<4:39:49,  1.75s/it]

Failed to parse URL at index 5039, URL: http://www.pittsburghfoodbank.org/about/history.aspx


 34%|███▍      | 5044/14621 [2:25:11<4:07:14,  1.55s/it]

Failed to parse URL at index 5044, URL: https://www.nytimes.com/1992/03/18/us/pittsburgh-transit-strike-makes-commuters-scramble.html
Failed to parse URL at index 5045, URL: https://www.nytimes.com/1992/07/16/us/new-talks-set-in-pittsburgh-newspaper-strike.html


 35%|███▍      | 5048/14621 [2:25:12<2:10:32,  1.22it/s]

Failed to parse URL at index 5047, URL: http://www.city.pittsburgh.pa.us/


 35%|███▍      | 5049/14621 [2:25:14<2:25:44,  1.09it/s]

Failed to parse URL at index 5048, URL: http://www.sustainablepittsburgh.org/whatisSP.html


 35%|███▍      | 5063/14621 [2:25:38<3:41:06,  1.39s/it]

Failed to parse URL at index 5063, URL: https://search.worldcat.org/oclc/2455241


 35%|███▍      | 5077/14621 [2:26:02<4:59:24,  1.88s/it]

Failed to parse URL at index 5077, URL: https://search.worldcat.org/oclc/1395886


 35%|███▍      | 5079/14621 [2:26:05<4:18:20,  1.62s/it]

Failed to parse URL at index 5079, URL: https://search.worldcat.org/oclc/11213848


 35%|███▍      | 5083/14621 [2:26:09<3:38:14,  1.37s/it]

Failed to parse URL at index 5083, URL: https://search.worldcat.org/oclc/13972268
Failed to parse URL at index 5084, URL: https://search.worldcat.org/oclc/21545881


 35%|███▍      | 5086/14621 [2:26:11<2:47:54,  1.06s/it]

Failed to parse URL at index 5086, URL: https://search.worldcat.org/oclc/25607897


 35%|███▍      | 5090/14621 [2:26:15<2:41:01,  1.01s/it]

Failed to parse URL at index 5090, URL: https://search.worldcat.org/oclc/248109


 35%|███▍      | 5094/14621 [2:26:18<2:22:15,  1.12it/s]

Failed to parse URL at index 5094, URL: https://search.worldcat.org/oclc/1855351


 35%|███▍      | 5104/14621 [2:26:30<4:44:20,  1.79s/it]

Failed to parse URL at index 5104, URL: https://search.worldcat.org/oclc/9034197


 35%|███▍      | 5106/14621 [2:26:32<3:32:24,  1.34s/it]

Failed to parse URL at index 5106, URL: https://search.worldcat.org/oclc/21352097


 35%|███▍      | 5109/14621 [2:26:35<3:35:29,  1.36s/it]

Failed to parse URL at index 5109, URL: https://search.worldcat.org/oclc/02338437


 35%|███▌      | 5123/14621 [2:26:53<4:04:41,  1.55s/it]

Failed to parse URL at index 5123, URL: https://search.worldcat.org/oclc/7590447


 35%|███▌      | 5151/14621 [2:27:33<6:52:50,  2.62s/it]

Failed to parse URL at index 5150, URL: https://web.archive.org/web/20160310002430/http://www.theatlantic.com/national/category/pittsburgh-pa/


 35%|███▌      | 5154/14621 [2:27:37<4:36:45,  1.75s/it]

Failed to parse URL at index 5154, URL: http://dp.la/search?utf8=✓&page_size=100&q=(pittsburg%20OR%20pittsburgh


 35%|███▌      | 5158/14621 [2:27:42<4:05:06,  1.55s/it]

Failed to parse URL at index 5157, URL: http://www.wqed.org/education/pghist/units/WPAhist/WPAhist_rsc.php


 43%|████▎     | 6231/14621 [2:47:43<5:54:45,  2.54s/it]

Failed to parse URL at index 6230, URL: https://archive.today/20120717190058/http://www.allaboutjazz.com/php/news.php?id=47573


 43%|████▎     | 6238/14621 [2:47:48<1:36:58,  1.44it/s]

Failed to parse URL at index 6236, URL: https://www.nytimes.com/2010/06/16/arts/dance/16wolken.html
Failed to parse URL at index 6237, URL: http://www.insidenorthside.com/james-michalopoulos-adventures-in-painting/


 43%|████▎     | 6243/14621 [2:47:58<3:12:26,  1.38s/it]

Failed to parse URL at index 6242, URL: http://www.databasefootball.com/players/playerpage.htm?ilkid=BULGEMAR01


 43%|████▎     | 6244/14621 [2:47:58<2:39:01,  1.14s/it]

Failed to parse URL at index 6243, URL: https://swimswam.com/bio/allison-schmitt/


 43%|████▎     | 6246/14621 [2:48:04<5:21:52,  2.31s/it]

Failed to parse URL at index 6245, URL: https://web.archive.org/web/20110609055712/http://www.post-gazette.com/pg/11157/1151670-139.stm


 43%|████▎     | 6247/14621 [2:48:10<7:31:51,  3.24s/it]

Failed to parse URL at index 6246, URL: http://www.post-gazette.com/pg/11157/1151670-139.stm
Failed to parse URL at index 6247, URL: https://www.nytimes.com/2011/11/07/books/my-long-trip-home-by-mark-whitaker-review.html


 43%|████▎     | 6253/14621 [2:48:19<3:58:31,  1.71s/it]

Failed to parse URL at index 6253, URL: https://www.nytimes.com/1999/05/20/us/jerome-wolken-82-scientist-who-gave-sight-to-some-blind.html


 43%|████▎     | 6255/14621 [2:48:19<2:26:15,  1.05s/it]

Failed to parse URL at index 6254, URL: http://www.nashuatelegraph.com/news/983569-469/nashua-guardsman-to-lead-ny-based-42nd-infantry.html


 43%|████▎     | 6290/14621 [2:49:05<3:01:01,  1.30s/it]

Failed to parse URL at index 6289, URL: https://www.jstor.org/action/doBasicSearch?Query=%22List+of+corporations+in+Pittsburgh%22&acc=on&wc=on


 43%|████▎     | 6297/14621 [2:49:11<2:01:15,  1.14it/s]

Failed to parse URL at index 6296, URL: https://en.wikipedia.org/w/index.php?title=L.B._Foster_Company&action=edit&redlink=1


 43%|████▎     | 6332/14621 [2:49:52<6:37:02,  2.87s/it]

Failed to parse URL at index 6331, URL: https://www.vectorsecurity.com/Company-Profile


 43%|████▎     | 6333/14621 [2:49:59<9:04:44,  3.94s/it]

Failed to parse URL at index 6332, URL: http://www.wexfordhealth.com/Contact-Us


 43%|████▎     | 6334/14621 [2:50:04<10:07:49,  4.40s/it]

Failed to parse URL at index 6333, URL: http://www.post-gazette.com/pg/09102/962006-28.stm


 43%|████▎     | 6335/14621 [2:50:04<7:24:42,  3.22s/it] 

Failed to parse URL at index 6334, URL: http://www.sae.org


 43%|████▎     | 6348/14621 [2:50:34<16:10:53,  7.04s/it]

Failed to parse URL at index 6348, URL: https://www.inc.com/magazine/201902/leigh-buchanan/pittsburgh-pennsylvania-food-hall-accelerator-2018-surge-cities.html
Failed to parse URL at index 6349, URL: https://www.fastcompany.com/90285175/how-americas-dying-rust-belt-town-can-transform-into-smart-cities-of-the-future


 43%|████▎     | 6353/14621 [2:50:42<7:47:48,  3.39s/it]

Failed to parse URL at index 6352, URL: https://www.usnews.com/best-colleges/rankings/computer-science-overall


 43%|████▎     | 6354/14621 [2:50:47<8:46:44,  3.82s/it]

Failed to parse URL at index 6353, URL: https://www.usnews.com/best-colleges/rankings/undergrad-research-programs


 43%|████▎     | 6356/14621 [2:50:53<8:13:28,  3.58s/it]

Failed to parse URL at index 6355, URL: https://www.usnews.com/best-graduate-schools/top-business-schools/information-systems-rankings


 43%|████▎     | 6357/14621 [2:50:58<9:09:15,  3.99s/it]

Failed to parse URL at index 6356, URL: https://www.usnews.com/best-colleges/rankings/national-universities/innovative


 44%|████▎     | 6366/14621 [2:51:11<3:49:41,  1.67s/it]

Failed to parse URL at index 6365, URL: http://www.heinz.cmu.edu/


 44%|████▎     | 6384/14621 [2:51:22<50:49,  2.70it/s]

Failed to parse URL at index 6383, URL: https://www.instagram.com/carnegiemellon/


 44%|████▎     | 6387/14621 [2:51:24<1:20:39,  1.70it/s]

Failed to parse URL at index 6386, URL: http://www.twitter.com/carnegiemellon


 44%|████▎     | 6388/14621 [2:51:25<1:49:42,  1.25it/s]

Failed to parse URL at index 6387, URL: https://www.linkedin.com/company/carnegie-mellon-university


 44%|████▎     | 6390/14621 [2:51:27<2:06:53,  1.08it/s]

Failed to parse URL at index 6389, URL: https://instagram.com/carnegiemellon/


 44%|████▍     | 6405/14621 [2:51:37<55:08,  2.48it/s]  

Failed to parse URL at index 6404, URL: https://www.cmu.edu/visit/welcome-center.html


 44%|████▍     | 6410/14621 [2:51:40<1:37:48,  1.40it/s]

Failed to parse URL at index 6409, URL: http://athletics.cmu.edu


 45%|████▌     | 6595/14621 [2:55:46<2:00:49,  1.11it/s]

Failed to parse URL at index 6594, URL: https://en.wikipedia.org/wiki/File:Tepper_School_of_Business.jpg


 45%|████▌     | 6634/14621 [2:56:44<2:13:37,  1.00s/it]

Failed to parse URL at index 6633, URL: https://www.bloomberg.com/business-schools/regions/us/


 46%|████▌     | 6683/14621 [2:57:57<2:29:43,  1.13s/it]

Failed to parse URL at index 6682, URL: https://en.wikipedia.org/w/index.php?title=Carnegie_Mellon_Neuroscience_Institute&action=edit&redlink=1


 46%|████▌     | 6710/14621 [2:58:45<3:22:32,  1.54s/it]

Failed to parse URL at index 6709, URL: https://en.wikipedia.org/w/index.php?title=Athens_Information_Technology&action=edit&redlink=1


 46%|████▋     | 6780/14621 [3:00:46<2:12:16,  1.01s/it]

Failed to parse URL at index 6779, URL: https://en.wikipedia.org/w/index.php?title=Carnegie_Mellon_University_Pre-College_Program&action=edit&redlink=1


 47%|████▋     | 6847/14621 [3:02:31<2:16:10,  1.05s/it]

Failed to parse URL at index 6846, URL: https://en.wikipedia.org/w/index.php?title=D._B._Russell&action=edit&redlink=1


 48%|████▊     | 6970/14621 [3:05:19<3:23:59,  1.60s/it]

Failed to parse URL at index 6969, URL: https://en.wikipedia.org/w/index.php?title=Davey_Quinn&action=edit&redlink=1


 48%|████▊     | 7010/14621 [3:06:25<2:46:30,  1.31s/it]

Failed to parse URL at index 7009, URL: https://en.wikipedia.org/w/index.php?title=W.B._(Walter_Booser)_%22Chip%22_Detweiler&action=edit&redlink=1


 49%|████▉     | 7129/14621 [3:11:42<7:02:11,  3.38s/it]

Failed to parse URL at index 7128, URL: http://www.cmu.edu/mascot/


 49%|████▉     | 7134/14621 [3:11:51<5:08:04,  2.47s/it]

Failed to parse URL at index 7133, URL: https://web.archive.org/web/20230315024857/https://www.cmu.edu/global/


 49%|████▉     | 7138/14621 [3:12:00<5:33:24,  2.67s/it]

Failed to parse URL at index 7137, URL: https://web.archive.org/web/20220628155931/https://www.cmu.edu/tepper/why-tepper/our-history.html


 49%|████▉     | 7141/14621 [3:12:08<5:01:23,  2.42s/it]

Failed to parse URL at index 7140, URL: https://athletics.cmu.edu/landing/index


 49%|████▉     | 7142/14621 [3:12:13<6:38:37,  3.20s/it]

Failed to parse URL at index 7141, URL: https://web.archive.org/web/20230812031335/https://athletics.cmu.edu/landing/index


 49%|████▉     | 7149/14621 [3:12:35<7:33:39,  3.64s/it]

Failed to parse URL at index 7148, URL: https://web.archive.org/web/20171114202539/https://www.acs.org/content/acs/en/education/whatischemistry/landmarks/mellon-institute.html


 49%|████▉     | 7151/14621 [3:12:42<7:42:35,  3.72s/it]

Failed to parse URL at index 7150, URL: https://web.archive.org/web/20090221125329/http://www.carnegiemellontoday.com/article.asp?Aid=347
Failed to parse URL at index 7151, URL: http://www.carnegiemellontoday.com/article.asp?Aid=347


 49%|████▉     | 7153/14621 [3:12:48<6:46:44,  3.27s/it]

Failed to parse URL at index 7152, URL: http://old.post-gazette.com/healthscience/20010331cmuhealth2.asp


 49%|████▉     | 7155/14621 [3:12:56<7:56:29,  3.83s/it]

Failed to parse URL at index 7154, URL: http://www.post-gazette.com/local/city/2014/03/24/CMU-student-center-to-be-renamed-for-Cohon/stories/201403240210


 49%|████▉     | 7156/14621 [3:13:01<8:36:34,  4.15s/it]

Failed to parse URL at index 7155, URL: https://web.archive.org/web/20140326001925/http://www.post-gazette.com/local/city/2014/03/24/CMU-student-center-to-be-renamed-for-Cohon/stories/201403240210


 49%|████▉     | 7160/14621 [3:13:12<7:03:09,  3.40s/it]

Failed to parse URL at index 7159, URL: https://web.archive.org/web/20220502163843/https://timesmachine.nytimes.com/timesmachine/1904/10/27/101400667.pdf?pdf_redirect=true&ip=0


 49%|████▉     | 7161/14621 [3:13:15<6:38:19,  3.20s/it]

Failed to parse URL at index 7160, URL: https://web.archive.org/web/20080513212745/http://download.srv.cs.cmu.edu/~pausch/Randy/pauschlastlecturetranscript.pdf


 49%|████▉     | 7163/14621 [3:13:25<8:46:29,  4.24s/it]

Failed to parse URL at index 7162, URL: https://www.usnews.com/education/worlds-best-universities-rankings/top-400-universities-in-the-world?page=2


 49%|████▉     | 7169/14621 [3:13:39<6:03:05,  2.92s/it]

Failed to parse URL at index 7168, URL: https://web.archive.org/web/20110208021108/http://whenwillwemove.com/


 49%|████▉     | 7174/14621 [3:13:49<4:00:55,  1.94s/it]

Failed to parse URL at index 7173, URL: http://www.cmu.edu/dietrich/=2011-09-07


 49%|████▉     | 7175/14621 [3:13:54<5:55:24,  2.86s/it]

Failed to parse URL at index 7174, URL: https://web.archive.org/web/20140625085908/http://www.cmu.edu/homepage/environment/2012/summer/new-energy-institute.shtml


 49%|████▉     | 7179/14621 [3:14:03<5:35:34,  2.71s/it]

Failed to parse URL at index 7178, URL: https://web.archive.org/web/20160320065709/http://www.cmu.edu/homepage/society/2013/fall/visionary-beginnings.shtml


 49%|████▉     | 7181/14621 [3:14:10<6:37:19,  3.20s/it]

Failed to parse URL at index 7180, URL: http://www.post-gazette.com/business/development/2015/04/22/CMU-carnegie-mellon-university-plans-hotel-retail-development-as-part-of-innovation-corridor-pittsburgh/stories/201504220154


 49%|████▉     | 7185/14621 [3:14:25<7:34:29,  3.67s/it]

Failed to parse URL at index 7184, URL: https://web.archive.org/web/20150828124653/http://www.cmu.edu/news/stories/archives/2015/august/historic-gift.html


 49%|████▉     | 7191/14621 [3:14:42<7:03:26,  3.42s/it]

Failed to parse URL at index 7190, URL: https://web.archive.org/web/20141215094152/http://www.cmu.edu/news/stories/archives/2013/february/feb5_ninthpresident.html


 49%|████▉     | 7193/14621 [3:14:50<7:57:38,  3.86s/it]

Failed to parse URL at index 7192, URL: https://web.archive.org/web/20180904191814/https://www.cmu.edu/news/stories/archives/2017/june/leadership-transition.html


 49%|████▉     | 7200/14621 [3:15:05<5:38:03,  2.73s/it]

Failed to parse URL at index 7199, URL: https://web.archive.org/web/20240913231841/https://www.cmu.edu/mcs/news-events/2024/0412_carnegie-mellon-breaks-ground-for-richard-king-mellon-hall-of-sciences.html


 49%|████▉     | 7203/14621 [3:15:12<5:20:58,  2.60s/it]

Failed to parse URL at index 7202, URL: https://www.usnews.com/best-colleges/rankings/national-universities


 49%|████▉     | 7205/14621 [3:15:15<4:06:42,  2.00s/it]

Failed to parse URL at index 7205, URL: https://www.wsj.com/rankings/college-rankings/best-colleges-2025


 49%|████▉     | 7210/14621 [3:15:21<4:08:14,  2.01s/it]

Failed to parse URL at index 7209, URL: https://www.usnews.com/education/best-global-universities/rankings


 49%|████▉     | 7211/14621 [3:15:26<5:50:52,  2.84s/it]

Failed to parse URL at index 7210, URL: https://www.usnews.com/best-graduate-schools/carnegie-mellon-university-211440/overall-rankings


 49%|████▉     | 7213/14621 [3:15:34<6:55:28,  3.37s/it]

Failed to parse URL at index 7212, URL: https://web.archive.org/web/20191223135332/https://www.usnews.com/best-colleges/rankings/national-universities


 49%|████▉     | 7215/14621 [3:15:42<7:54:46,  3.85s/it]

Failed to parse URL at index 7214, URL: https://web.archive.org/web/20220401162920/https://www.cmu.edu/news/stories/archives/2022/march/grad-rankings.html


 49%|████▉     | 7216/14621 [3:15:47<8:38:16,  4.20s/it]

Failed to parse URL at index 7215, URL: https://www.usnews.com/best-graduate-schools/top-science-schools/computer-science-rankings


 49%|████▉     | 7219/14621 [3:15:59<8:26:00,  4.10s/it]

Failed to parse URL at index 7218, URL: https://web.archive.org/web/20110714175956/http://www.newsweek.com/2006/08/20/25-new-ivies.html
Failed to parse URL at index 7219, URL: https://www.wsj.com/articles/SB10001424052748704554104575435563989873060


 49%|████▉     | 7221/14621 [3:16:04<7:06:24,  3.46s/it]

Failed to parse URL at index 7220, URL: https://web.archive.org/web/20141226023122/http://www.wsj.com/articles/SB10001424052748704554104575435563989873060


 49%|████▉     | 7224/14621 [3:16:09<4:13:11,  2.05s/it]

Failed to parse URL at index 7223, URL: https://www.bloomberg.com/business-schools/2018/us/#ranking=0


 49%|████▉     | 7236/14621 [3:16:31<4:29:07,  2.19s/it]

Failed to parse URL at index 7235, URL: http://www3.weforum.org/docs/GULF_Members_2015.pdf


 50%|████▉     | 7238/14621 [3:16:34<3:22:34,  1.65s/it]

Failed to parse URL at index 7237, URL: https://www.cmu.edu/news/stories/archives/2023/august/cmu-president-jahanian-welcomes-the-class-of-2027


 50%|████▉     | 7239/14621 [3:16:39<5:26:49,  2.66s/it]

Failed to parse URL at index 7238, URL: https://web.archive.org/web/20240916031055/https://www.cmu.edu/news/stories/archives/2023/august/cmu-president-jahanian-welcomes-the-class-of-2027


 50%|████▉     | 7242/14621 [3:16:47<5:15:34,  2.57s/it]

Failed to parse URL at index 7241, URL: https://www.cmu.edu/ira/undergraduate-admission/pdfs/2021-pdfs/fall-2021-first-year-cohort-admission-rates.pdf


 50%|████▉     | 7245/14621 [3:16:55<5:55:32,  2.89s/it]

Failed to parse URL at index 7244, URL: https://web.archive.org/web/20220120073814/https://www.cmu.edu/ira/CDS/cds_2021.html


 50%|████▉     | 7247/14621 [3:17:02<6:47:43,  3.32s/it]

Failed to parse URL at index 7246, URL: https://web.archive.org/web/20201112010943/https://www.cmu.edu/ira/CDS/cds_1920.html


 50%|████▉     | 7254/14621 [3:17:16<4:59:56,  2.44s/it]

Failed to parse URL at index 7253, URL: https://www.usnews.com/best-colleges/carnegie-mellon-university-3242


 50%|████▉     | 7255/14621 [3:17:21<6:34:51,  3.22s/it]

Failed to parse URL at index 7254, URL: https://web.archive.org/web/20170227032311/https://www.usnews.com/best-colleges/carnegie-mellon-university-3242


 50%|████▉     | 7258/14621 [3:17:22<2:52:05,  1.40s/it]

Failed to parse URL at index 7256, URL: https://web.archive.org/web/20220104215002/https://www.cmu.edu/ira/undergraduate-admission/index.html
Failed to parse URL at index 7257, URL: https://web.archive.org/web/20191212200356/https://admission.enrollment.cmu.edu/media/W1siZiIsIjIwMTkvMTEvMjAvNjVmbnBxOTlvc19BRE1fMjBfMDc3X0dlbmVyYWxfRmFjdF9TaGVldF8wNF9Gb3JXZWIucGRmIl1d/ADM-20-077_General_Fact_Sheet_04_ForWeb.pdf


 50%|████▉     | 7259/14621 [3:17:24<2:41:22,  1.32s/it]

Failed to parse URL at index 7258, URL: https://admission.enrollment.cmu.edu/media/W1siZiIsIjIwMTkvMTEvMjAvNjVmbnBxOTlvc19BRE1fMjBfMDc3X0dlbmVyYWxfRmFjdF9TaGVldF8wNF9Gb3JXZWIucGRmIl1d/ADM-20-077_General_Fact_Sheet_04_ForWeb.pdf


 50%|████▉     | 7261/14621 [3:17:24<1:31:53,  1.33it/s]

Failed to parse URL at index 7259, URL: https://www.cmu.edu/ira/undergraduate-admission/pdfs/2021-pdfs/fall-2021-first-year-cohort-by-college-and-racecitizenship.pdf
Failed to parse URL at index 7260, URL: https://web.archive.org/web/20220104215001/https://www.cmu.edu/ira/undergraduate-admission/pdfs/2021-pdfs/fall-2021-first-year-cohort-by-college-and-racecitizenship.pdf


 50%|████▉     | 7263/14621 [3:17:25<1:04:16,  1.91it/s]

Failed to parse URL at index 7262, URL: https://web.archive.org/web/20230105031345/https://www.cmu.edu/ira/CDS/cds_2122.html


 50%|████▉     | 7265/14621 [3:17:27<1:38:29,  1.24it/s]

Failed to parse URL at index 7264, URL: https://web.archive.org/web/20210118011556/https://admission.enrollment.cmu.edu/pages/financial-aid


 50%|████▉     | 7267/14621 [3:17:28<1:04:30,  1.90it/s]

Failed to parse URL at index 7266, URL: https://web.archive.org/web/20240913231726/https://www.cmu.edu/sfs/tuition/undergraduate/index.html


 50%|████▉     | 7269/14621 [3:17:30<1:43:21,  1.19it/s]

Failed to parse URL at index 7268, URL: https://web.archive.org/web/20220731121956/https://ncsesdata.nsf.gov/profiles/site?method=report&tin=U0548001&id=h3


 50%|████▉     | 7272/14621 [3:17:32<1:12:16,  1.69it/s]

Failed to parse URL at index 7270, URL: https://web.archive.org/web/20160505112825/https://news.google.com/newspapers?id=PNVRAAAAIBAJ&sjid=320DAAAAIBAJ&pg=1637,7096194
Failed to parse URL at index 7271, URL: https://web.archive.org/web/20080208171920/http://www.psc.edu/


 50%|████▉     | 7275/14621 [3:17:33<51:38,  2.37it/s]  

Failed to parse URL at index 7274, URL: https://web.archive.org/web/20240913231726/https://www.bizjournals.com/pittsburgh/news/2018/02/27/professor-to-lead-new-neuroscience-institute-at.html


 50%|████▉     | 7277/14621 [3:17:35<1:04:14,  1.91it/s]

Failed to parse URL at index 7276, URL: https://web.archive.org/web/20240913231750/https://www.technologynetworks.com/neuroscience/news/barbara-shinn-cunningham-to-lead-carnegie-mellons-new-neuroscience-institute-298061


 50%|████▉     | 7280/14621 [3:17:38<1:42:58,  1.19it/s]

Failed to parse URL at index 7279, URL: https://web.archive.org/web/20170331145926/http://philanthropynewsdigest.org/news/carnegie-mellon-launches-75-million-brain-research-initiative


 50%|████▉     | 7282/14621 [3:17:39<1:15:22,  1.62it/s]

Failed to parse URL at index 7281, URL: https://web.archive.org/web/20240913231727/https://www.wesa.fm/science-health-tech/2017-07-03/cmu-researchers-are-teaching-computers-to-read-minds


 50%|████▉     | 7284/14621 [3:17:40<1:05:22,  1.87it/s]

Failed to parse URL at index 7283, URL: https://web.archive.org/web/20240913231728/https://www.freethink.com/health/stentrode


 50%|████▉     | 7286/14621 [3:17:41<1:06:22,  1.84it/s]

Failed to parse URL at index 7285, URL: https://web.archive.org/web/20240913231728/https://www.neurotechreports.com/pages/Cell-targeted-DBS.html
Failed to parse URL at index 7286, URL: https://www.sciencedaily.com/releases/2022/08/220801133143.htm


 50%|████▉     | 7290/14621 [3:17:45<1:36:38,  1.26it/s]

Failed to parse URL at index 7288, URL: https://nida.nih.gov/research/research-training-career-development/extramural-research-training-career-development/blueprint-research-training-sites-computational
Failed to parse URL at index 7289, URL: https://web.archive.org/web/20230816201136/https://nida.nih.gov/research/research-training-career-development/extramural-research-training-career-development/blueprint-research-training-sites-computational


 50%|████▉     | 7292/14621 [3:17:46<1:19:27,  1.54it/s]

Failed to parse URL at index 7291, URL: https://web.archive.org/web/20240913232255/https://reporter.nih.gov/search/avNbxToFkE-Zu0NQ43X7YQ/project-details/10411631#similar-Projects


 50%|████▉     | 7294/14621 [3:17:49<1:35:21,  1.28it/s]

Failed to parse URL at index 7293, URL: https://web.archive.org/web/20221206145945/https://www.rkmf.org/news_posts/carnegie-mellon-richard-king-mellon-foundation-announce-historic-partnership-to-accelerate-cmu-s-science-and-technology-leadership-and-the-transformation-of-hazelwood-green


 50%|████▉     | 7300/14621 [3:18:05<6:15:46,  3.08s/it]

Failed to parse URL at index 7299, URL: https://web.archive.org/web/20240913232402/https://www.myscience.org/news/wire/three_new_trustees_elected_to_cmu_board-2021-cmu


 50%|█████     | 7313/14621 [3:18:28<5:08:42,  2.53s/it]

Failed to parse URL at index 7312, URL: https://web.archive.org/web/20091203001849/http://www.sei.cmu.edu/


 50%|█████     | 7314/14621 [3:18:34<6:51:44,  3.38s/it]

Failed to parse URL at index 7313, URL: https://web.archive.org/web/20080509093930/http://www.hcii.cmu.edu/


 50%|█████     | 7316/14621 [3:18:41<7:36:01,  3.75s/it]

Failed to parse URL at index 7315, URL: https://web.archive.org/web/20081216045852/http://www.lti.cs.cmu.edu/


 50%|█████     | 7320/14621 [3:18:53<6:53:15,  3.40s/it]

Failed to parse URL at index 7319, URL: https://www.washingtonpost.com/local/education/in-qatars-education-city-us-colleges-are-building-an-academic-oasis/2015/12/06/6b538702-8e01-11e5-ae1f-af46b7df8483_story.html


 50%|█████     | 7332/14621 [3:19:20<4:49:59,  2.39s/it]

Failed to parse URL at index 7332, URL: http://local.lancasteronline.com/4/28490


 50%|█████     | 7340/14621 [3:19:31<3:06:05,  1.53s/it]

Failed to parse URL at index 7339, URL: http://www.time.com/time/specials/2007/article/0,28804,1733748_1733756_1736194,00.html


 50%|█████     | 7341/14621 [3:19:32<2:53:28,  1.43s/it]

Failed to parse URL at index 7340, URL: http://www.playbill.com/news/article/191914-Zachary-Quinto-and-Matt-Bomer-Will-Introduce-New-Educator-Honor-at-2014-Tony-Awards


 50%|█████     | 7343/14621 [3:19:39<5:16:33,  2.61s/it]

Failed to parse URL at index 7342, URL: https://web.archive.org/web/20110208162108/http://www.drama.cmu.edu/147/admissions-requirements


 50%|█████     | 7344/14621 [3:19:40<4:26:41,  2.20s/it]

Failed to parse URL at index 7343, URL: http://drama.cmu.edu/147/admissions-requirements


 50%|█████     | 7346/14621 [3:19:43<3:31:14,  1.74s/it]

Failed to parse URL at index 7345, URL: http://soa.cmu.edu/undergraduateadmissions/


 50%|█████     | 7355/14621 [3:20:05<5:23:25,  2.67s/it]

Failed to parse URL at index 7354, URL: https://web.archive.org/web/20180207122733/https://www.cmu.edu/interdisciplinary/programs/bsaprogram.html


 50%|█████     | 7357/14621 [3:20:07<3:47:32,  1.88s/it]

Failed to parse URL at index 7356, URL: http://grad-schools.usnews.rankingsandreviews.com/usnews/edu/grad/rankings/phdsci/brief/com_brief.php


 50%|█████     | 7362/14621 [3:20:14<2:24:31,  1.19s/it]

Failed to parse URL at index 7361, URL: http://mytepper.tepper.cmu.edu/current-students/Undergraduate-Students/Business%20Administration%20Degrees


 50%|█████     | 7364/14621 [3:20:17<2:34:24,  1.28s/it]

Failed to parse URL at index 7363, URL: http://www.cmu.edu/ideate/index.html


 50%|█████     | 7365/14621 [3:20:22<4:50:00,  2.40s/it]

Failed to parse URL at index 7364, URL: https://web.archive.org/web/20080509054707/http://huntbot.andrew.cmu.edu/HIBD/Default.shtml


 50%|█████     | 7366/14621 [3:20:22<3:31:56,  1.75s/it]

Failed to parse URL at index 7365, URL: http://huntbot.andrew.cmu.edu/HIBD/Default.shtml


 50%|█████     | 7369/14621 [3:20:31<5:44:14,  2.85s/it]

Failed to parse URL at index 7368, URL: http://www.post-gazette.com/stories/local/neighborhoods-city/pitt-cmu-make-most-of-proximity-266578/?p=0


 50%|█████     | 7371/14621 [3:20:36<5:15:27,  2.61s/it]

Failed to parse URL at index 7370, URL: http://www.pitt.edu/~oafa/community.html


 50%|█████     | 7375/14621 [3:20:45<4:46:18,  2.37s/it]

Failed to parse URL at index 7374, URL: https://web.archive.org/web/20200727073538/http://www.pchepa.org/index.php?option=com_content&task=view&id=14&Itemid=29


 50%|█████     | 7378/14621 [3:20:49<3:07:45,  1.56s/it]

Failed to parse URL at index 7377, URL: http://ml.hss.cmu.edu/slrf2012/


 50%|█████     | 7382/14621 [3:20:57<4:03:32,  2.02s/it]

Failed to parse URL at index 7381, URL: http://www.alice.org/index.php?page=what_is_alice/what_is_alice


 51%|█████     | 7386/14621 [3:21:08<5:56:43,  2.96s/it]

Failed to parse URL at index 7385, URL: http://university-discoveries.com/carnegie-mellon-university


 51%|█████     | 7397/14621 [3:21:29<4:30:17,  2.24s/it]

Failed to parse URL at index 7396, URL: http://www.popcitymedia.com/innovationnews/wifi100511.aspx


 51%|█████     | 7399/14621 [3:21:32<3:31:56,  1.76s/it]

Failed to parse URL at index 7398, URL: http://www.cmu.edu/epp/people/faculty/alex-hills.html


 51%|█████     | 7408/14621 [3:21:46<2:50:18,  1.42s/it]

Failed to parse URL at index 7407, URL: https://www.cmu.edu/corporate/pod/affiliated_companies.html


 51%|█████     | 7409/14621 [3:21:51<5:00:16,  2.50s/it]

Failed to parse URL at index 7408, URL: https://web.archive.org/web/20171019062635/http://www.alumni.cmu.edu/s/1410/images/editor_documents/alumnirelations/about/notable/notable-alumni.pdf?no_cookie=1


 51%|█████     | 7410/14621 [3:21:53<5:09:04,  2.57s/it]

Failed to parse URL at index 7409, URL: http://www.alumni.cmu.edu/s/1410/images/editor_documents/alumnirelations/about/notable/notable-alumni.pdf?no_cookie=1
Failed to parse URL at index 7410, URL: https://collegescorecard.ed.gov/school/?211440-Carnegie-Mellon-University


 51%|█████     | 7425/14621 [3:22:19<3:18:09,  1.65s/it]

Failed to parse URL at index 7424, URL: http://www.housing.cmu.edu/buildings/


 51%|█████     | 7430/14621 [3:22:29<4:49:11,  2.41s/it]

Failed to parse URL at index 7429, URL: https://web.archive.org/web/20090901235405/http://www.nrahq.org/compete/nat-trophy/tro-113.pdf


 51%|█████     | 7437/14621 [3:22:42<3:21:50,  1.69s/it]

Failed to parse URL at index 7436, URL: http://www.contrib.andrew.cmu.edu/~ultimate/


 51%|█████     | 7438/14621 [3:22:47<5:21:37,  2.69s/it]

Failed to parse URL at index 7437, URL: https://web.archive.org/web/20080311185634/http://www.tartancrew.org/


 51%|█████     | 7440/14621 [3:22:54<6:22:47,  3.20s/it]

Failed to parse URL at index 7439, URL: https://web.archive.org/web/20110201133215/http://tartanhockey.com/
Failed to parse URL at index 7440, URL: http://www.tartanhockey.com


 51%|█████     | 7442/14621 [3:22:57<4:32:39,  2.28s/it]

Failed to parse URL at index 7442, URL: http://www.cmubaseball.com/


 51%|█████     | 7445/14621 [3:22:59<3:02:43,  1.53s/it]

Failed to parse URL at index 7444, URL: http://athletics.cmu.edu/recreation/clubsports


 51%|█████     | 7447/14621 [3:23:02<3:10:02,  1.59s/it]

Failed to parse URL at index 7446, URL: http://www.andrew.cmu.edu/org/cmuwp/


 51%|█████     | 7449/14621 [3:23:05<2:43:17,  1.37s/it]

Failed to parse URL at index 7448, URL: http://www.andrew.cmu.edu/~cycling/


 51%|█████     | 7450/14621 [3:23:06<2:52:25,  1.44s/it]

Failed to parse URL at index 7450, URL: http://www.carnegiemellontoday.com/article.asp?Aid=380


 51%|█████     | 7456/14621 [3:23:15<3:21:20,  1.69s/it]

Failed to parse URL at index 7455, URL: http://www.wellsville.wnyric.org/education/components/scrapbook/default.php?sectiondetailid=104408&pagecat=546


 51%|█████     | 7457/14621 [3:23:20<5:14:28,  2.63s/it]

Failed to parse URL at index 7456, URL: https://web.archive.org/web/20110929001025/http://www.wellsville.wnyric.org/education/components/scrapbook/default.php?sectiondetailid=104408&pagecat=546


 51%|█████     | 7458/14621 [3:23:20<3:50:50,  1.93s/it]

Failed to parse URL at index 7457, URL: https://athletics.cmu.edu/intramurals/IMhome


 51%|█████     | 7460/14621 [3:23:25<3:58:12,  2.00s/it]

Failed to parse URL at index 7459, URL: http://www.cmu.edu/ira/alumni/index.html


 51%|█████     | 7461/14621 [3:23:30<5:44:46,  2.89s/it]

Failed to parse URL at index 7460, URL: https://web.archive.org/web/20230813214952/https://www.cmu.edu/ira/alumni/index.html


 51%|█████     | 7466/14621 [3:23:46<7:46:29,  3.91s/it]

Failed to parse URL at index 7465, URL: https://web.archive.org/web/20170908020305/http://www.mcall.com/entertainment/arts/mc-international-fusion-easton-shalom-neuman-20170415-story.html


 51%|█████     | 7467/14621 [3:23:47<5:41:15,  2.86s/it]

Failed to parse URL at index 7466, URL: https://www.mcall.com/entertainment/arts/mc-international-fusion-easton-shalom-neuman-20170415-story.html


 51%|█████     | 7471/14621 [3:23:57<5:46:48,  2.91s/it]

Failed to parse URL at index 7470, URL: https://web.archive.org/web/20190502133300/https://www.cmu.edu/cmnews/extra/040223_makeoverman.html


 51%|█████     | 7477/14621 [3:24:09<4:28:46,  2.26s/it]

Failed to parse URL at index 7476, URL: http://www.alumni.cmu.edu/s/1410/alumni/index-social.aspx?sid=1410&gid=1&pgid=377


 51%|█████     | 7479/14621 [3:24:11<2:59:52,  1.51s/it]

Failed to parse URL at index 7478, URL: http://www.cmu.edu/piper/piper/2011/march/judyresnik.html


 51%|█████     | 7484/14621 [3:24:16<1:59:59,  1.01s/it]

Failed to parse URL at index 7483, URL: http://athletics.cmu.edu/


 53%|█████▎    | 7737/14621 [3:31:24<2:25:03,  1.26s/it]

Failed to parse URL at index 7736, URL: http://olduli.nli.org.il/F/?func=find-b&local_base=NLX10&find_code=UID&request=987007568422105171


 53%|█████▎    | 7741/14621 [3:31:28<1:41:58,  1.12it/s]

Failed to parse URL at index 7741, URL: https://www.moma.org/artists/33300


 53%|█████▎    | 7760/14621 [3:31:48<2:24:29,  1.26s/it]

Failed to parse URL at index 7759, URL: http://www.cmu.edu/graduate/prospective-students/index.html


 54%|█████▎    | 7836/14621 [3:32:17<51:18,  2.20it/s]

Failed to parse URL at index 7835, URL: https://www.cmu.edu/admission/costs-aid/types-of-aid


 54%|█████▎    | 7837/14621 [3:32:17<50:11,  2.25it/s]

Failed to parse URL at index 7836, URL: https://www.cmu.edu/admission/costs-aid/applying-for-aid


 54%|█████▎    | 7838/14621 [3:32:18<47:26,  2.38it/s]

Failed to parse URL at index 7837, URL: https://www.cmu.edu/admission/costs-aid/tuition-and-fees


 54%|█████▎    | 7840/14621 [3:32:19<46:39,  2.42it/s]

Failed to parse URL at index 7839, URL: https://www.cmu.edu/admission/costs-aid/financial-aid-faq


 54%|█████▎    | 7847/14621 [3:32:21<39:44,  2.84it/s]

Failed to parse URL at index 7846, URL: https://twitter.com/CarnegieMellon


 54%|█████▎    | 7850/14621 [3:32:23<1:03:41,  1.77it/s]

Failed to parse URL at index 7849, URL: https://www.instagram.com/carnegiemellonadmission/


 54%|█████▍    | 7919/14621 [3:33:01<32:21,  3.45it/s]

Failed to parse URL at index 7918, URL: https://www.cmu.edu/leadership/president/lecture-series/index-old.html


 55%|█████▍    | 7976/14621 [3:33:45<4:43:07,  2.56s/it]

Failed to parse URL at index 7975, URL: https://en.wikipedia.org/w/index.php?title=Carnegie_Mellon_University_traditions&action=history


 55%|█████▍    | 8025/14621 [3:34:54<2:41:04,  1.47s/it]

Failed to parse URL at index 8024, URL: https://thetartan.org/2012/8/19/pillbox/traditions


 55%|█████▍    | 8027/14621 [3:34:58<3:06:39,  1.70s/it]

Failed to parse URL at index 8027, URL: http://www.therepublic.com/view/story/1cf66a3f4fb44440b130a824561e15b9/PA--Member-Exchange-CMU-Fence/


 55%|█████▍    | 8032/14621 [3:35:05<3:42:25,  2.03s/it]

Failed to parse URL at index 8031, URL: https://web.archive.org/web/20160304053848/http://thetartan.org/2011/3/28/news/heal_the_fence


 55%|█████▍    | 8037/14621 [3:35:11<2:30:11,  1.37s/it]

Failed to parse URL at index 8036, URL: http://cmubuggy.org/video/1969raceday


 55%|█████▍    | 8040/14621 [3:35:18<4:21:40,  2.39s/it]

Failed to parse URL at index 8039, URL: https://web.archive.org/web/20160529200712/http://cmubuggy.org/gallery/1980s/KingEider1988


 55%|█████▌    | 8047/14621 [3:35:25<2:05:42,  1.15s/it]

Failed to parse URL at index 8046, URL: https://athletics.cmu.edu/generalnews/2021-2022/olisar_named_kilitie_director


 55%|█████▌    | 8052/14621 [3:35:31<2:24:08,  1.32s/it]

Failed to parse URL at index 8051, URL: https://www.cmu.edu/student-affairs/dean/kiltie/index.html


 55%|█████▌    | 8068/14621 [3:36:03<3:03:58,  1.68s/it]

Failed to parse URL at index 8066, URL: https://www.usnews.com/best-colleges/carnegie-mellon-university-3242/overall-rankings


 55%|█████▌    | 8071/14621 [3:36:04<1:28:03,  1.24it/s]

Failed to parse URL at index 8069, URL: https://www.backstage.com/magazine/article/top-acting-colleges-5772/
Failed to parse URL at index 8070, URL: https://www.bloomberg.com/business-schools/


 55%|█████▌    | 8072/14621 [3:36:16<6:29:56,  3.57s/it]

Failed to parse URL at index 8071, URL: https://www.shanghairanking.com/rankings/arwu/2023


 55%|█████▌    | 8073/14621 [3:36:17<5:15:37,  2.89s/it]

Failed to parse URL at index 8072, URL: https://www.princetonreview.com/college-rankings?rankings=best-389-colleges


 55%|█████▌    | 8112/14621 [3:36:57<46:55,  2.31it/s]

Failed to parse URL at index 8111, URL: https://twitter.com/cmumusic


 55%|█████▌    | 8114/14621 [3:36:59<1:00:16,  1.80it/s]

Failed to parse URL at index 8113, URL: https://www.instagram.com/cmumusic/


 56%|█████▌    | 8146/14621 [3:37:08<30:50,  3.50it/s]

Failed to parse URL at index 8145, URL: https://www.cmu.edu/cfa/music/apply/undergraduate_admission/undergrad_application_requirements.html


 56%|█████▌    | 8148/14621 [3:37:08<28:41,  3.76it/s]

Failed to parse URL at index 8147, URL: https://www.cmu.edu/cfa/music/apply/graduate_admission/graduate_application_requirements.html


 56%|█████▌    | 8151/14621 [3:37:09<27:47,  3.88it/s]

Failed to parse URL at index 8150, URL: https://www.cmu.edu/cfa/music/apply/audition-portfolio-review/pre-screening-materials.html


 57%|█████▋    | 8286/14621 [3:38:02<2:23:06,  1.36s/it]

Failed to parse URL at index 8285, URL: https://athletics.cmu.edu/composite


 57%|█████▋    | 8292/14621 [3:38:07<1:29:58,  1.17it/s]

Failed to parse URL at index 8291, URL: https://events.cmu.edu/social-media/index.html


 57%|█████▋    | 8293/14621 [3:38:08<1:26:05,  1.23it/s]

Failed to parse URL at index 8292, URL: https://events.cmu.edu/about/index.html


 57%|█████▋    | 8346/14621 [3:38:25<54:49,  1.91it/s]

Failed to parse URL at index 8345, URL: https://pittsburghmusicals.com/support/donate/


 57%|█████▋    | 8355/14621 [3:38:34<1:40:11,  1.04it/s]

Failed to parse URL at index 8355, URL: https://www.grazecraze.com/north-pittsburgh-pa


 57%|█████▋    | 8360/14621 [3:38:36<1:02:21,  1.67it/s]

Failed to parse URL at index 8359, URL: https://www.twitter.com/pmtmusicals


 57%|█████▋    | 8361/14621 [3:38:37<1:09:21,  1.50it/s]

Failed to parse URL at index 8360, URL: https://www.instagram.com/pmtmusicals


 57%|█████▋    | 8362/14621 [3:38:38<1:13:19,  1.42it/s]

Failed to parse URL at index 8361, URL: https://pittsburghmusicals.com/community-connections/


 57%|█████▋    | 8376/14621 [3:38:56<2:04:00,  1.19s/it]

Failed to parse URL at index 8375, URL: https://playhouse.culturaldistrict.org/package/13739


 57%|█████▋    | 8377/14621 [3:38:56<1:33:36,  1.11it/s]

Failed to parse URL at index 8376, URL: https://playhouse.culturaldistrict.org/session?forward_to=https%3A%2F%2Fplayhouse.culturaldistrict.org%2Fpackage%2Frenew%2F27793


 57%|█████▋    | 8382/14621 [3:39:05<2:17:36,  1.32s/it]

Failed to parse URL at index 8381, URL: https://playhouse.culturaldistrict.org/production/95831/escher-quartet


 57%|█████▋    | 8385/14621 [3:39:34<8:13:01,  4.74s/it] 

Failed to parse URL at index 8384, URL: https://playhouse.culturaldistrict.org/production/95837/tommy-mesa-and-michelle-cann


 57%|█████▋    | 8388/14621 [3:39:59<9:20:14,  5.39s/it] 

Failed to parse URL at index 8387, URL: https://playhouse.culturaldistrict.org/production/95841/les-delices


 57%|█████▋    | 8391/14621 [3:40:18<8:30:27,  4.92s/it] 

Failed to parse URL at index 8390, URL: https://playhouse.culturaldistrict.org/production/95846/publiquartet


 57%|█████▋    | 8394/14621 [3:40:48<10:53:16,  6.29s/it]

Failed to parse URL at index 8393, URL: https://playhouse.culturaldistrict.org/production/95850/dover-quartet


 58%|█████▊    | 8424/14621 [3:41:15<59:12,  1.74it/s]  

Failed to parse URL at index 8423, URL: https://forms.gle/vohzJQ6pwcx1sXDMA


 58%|█████▊    | 8429/14621 [3:41:26<4:15:59,  2.48s/it]

Failed to parse URL at index 8428, URL: https://www.youtube.com/results?search_query=%23MySongIsYourSong


 58%|█████▊    | 8431/14621 [3:41:29<3:07:27,  1.82s/it]

Failed to parse URL at index 8430, URL: https://www.instagram.com/explore/tags/mysongisyoursong/


 58%|█████▊    | 8435/14621 [3:41:39<4:44:01,  2.75s/it]

Failed to parse URL at index 8434, URL: https://www.post-gazette.com/ae/music/2020/06/15/What-s-happening-in-Pittsburgh-and-online-this-weekend-June-18-21/stories/202006150069


 58%|█████▊    | 8436/14621 [3:41:40<4:01:12,  2.34s/it]

Failed to parse URL at index 8435, URL: https://nextpittsburgh.com/events/14-things-to-do-this-weekend-in-pittsburgh-from-the-jazz-festival-to-the-gap-relay/


 58%|█████▊    | 8441/14621 [3:41:49<3:53:45,  2.27s/it]

Failed to parse URL at index 8441, URL: http://triblive.com/aande/music/10353448-74/music-says-pittsburgh


 58%|█████▊    | 8443/14621 [3:41:54<4:05:58,  2.39s/it]

Failed to parse URL at index 8442, URL: http://www.post-gazette.com/ae/music/2015/06/19/Let-the-music-play-all-day-Sunday/stories/201506190080
Failed to parse URL at index 8443, URL: http://triblive.com/aande/music/8443314-74/music-pittsburgh-says#axzz3dWExYDv3
Failed to parse URL at index 8444, URL: http://www.pghcitypaper.com/pittsburgh/this-sunday-the-international-make-music-festival-comes-to-pittsburgh/Content?oid=1834235&mode=print


 58%|█████▊    | 8446/14621 [3:42:00<3:37:25,  2.11s/it]

Failed to parse URL at index 8445, URL: http://blogs.post-gazette.com/arts-entertainment/measured-words/44313-make-music-pittsburgh-call-for-artists-venues


 58%|█████▊    | 8485/14621 [3:42:55<1:59:55,  1.17s/it]

Failed to parse URL at index 8484, URL: http://www.pittsburghsymphony.org


 58%|█████▊    | 8552/14621 [3:44:19<2:03:00,  1.22s/it]

Failed to parse URL at index 8552, URL: https://www.carnegielibrary.org/special-collections/music-special-collections/


 59%|█████▊    | 8557/14621 [3:44:25<1:58:37,  1.17s/it]

Failed to parse URL at index 8556, URL: https://pittsburghsymphony.org/pso_home/web/about-landing/history/history-of-the-pittsburgh-symphony-orchestra


 59%|█████▊    | 8560/14621 [3:44:35<4:22:16,  2.60s/it]

Failed to parse URL at index 8559, URL: https://web.archive.org/web/20121023192200/http://www.time.com/time/magazine/article/0,9171,757929,00.html?iid=chix-sphere


 59%|█████▊    | 8561/14621 [3:44:35<3:21:18,  1.99s/it]

Failed to parse URL at index 8560, URL: http://www.time.com/time/magazine/article/0,9171,757929,00.html?iid=chix-sphere


 59%|█████▊    | 8573/14621 [3:45:42<3:40:02,  2.18s/it]

Failed to parse URL at index 8573, URL: https://www.nytimes.com/2014/07/14/arts/music/lorin-maazel-brilliant-intense-and-enigmatic-conductor-dies-at-84.html


 59%|█████▊    | 8575/14621 [3:45:43<2:20:53,  1.40s/it]

Failed to parse URL at index 8575, URL: https://www.nytimes.com/1986/03/26/arts/music-lorin-maazel-leads-the-pittsburgh.html


 59%|█████▊    | 8577/14621 [3:45:44<1:46:13,  1.05s/it]

Failed to parse URL at index 8577, URL: https://www.nytimes.com/1994/07/04/arts/maazel-to-leave-pittsburgh-symphony-in-96.html


 59%|█████▊    | 8580/14621 [3:45:50<2:36:56,  1.56s/it]

Failed to parse URL at index 8580, URL: http://triblive.com/aande/music/7028112-74/maazel-pittsburgh-music#axzz3IIpgoySr


 59%|█████▊    | 8582/14621 [3:45:55<3:20:47,  1.99s/it]

Failed to parse URL at index 8581, URL: http://www.post-gazette.com/pg/04137/315957.stm


 59%|█████▊    | 8586/14621 [3:46:00<2:45:23,  1.64s/it]

Failed to parse URL at index 8586, URL: http://triblive.com/x/pittsburghtrib/ae/music/s_411162.html
Failed to parse URL at index 8587, URL: http://triblive.com/x/pittsburghtrib/ae/more/s_458965.html


 59%|█████▉    | 8591/14621 [3:46:08<3:17:55,  1.97s/it]

Failed to parse URL at index 8590, URL: http://www.post-gazette.com/pg/07300/828842-42.stm


 59%|█████▉    | 8592/14621 [3:46:08<2:40:57,  1.60s/it]

Failed to parse URL at index 8591, URL: http://www.pittsburghlive.com/x/pittsburghtrib/s_490039.html
Failed to parse URL at index 8592, URL: https://www.nytimes.com/2007/01/24/arts/24hone.html?ex=1327294800&en=63c573b6727e887e&ei=5088&partner=rssnyt&emc=rss


 59%|█████▉    | 8594/14621 [3:46:09<1:52:10,  1.12s/it]

Failed to parse URL at index 8593, URL: http://www.pittsburghsymphony.org/honeckpr.pdf


 59%|█████▉    | 8595/14621 [3:46:14<3:20:46,  2.00s/it]

Failed to parse URL at index 8594, URL: http://www.post-gazette.com/pg/09253/996931-388.stm


 59%|█████▉    | 8596/14621 [3:46:19<4:34:43,  2.74s/it]

Failed to parse URL at index 8595, URL: http://www.post-gazette.com/pg/12044/1209951-100.stm


 59%|█████▉    | 8597/14621 [3:46:24<5:34:39,  3.33s/it]

Failed to parse URL at index 8596, URL: https://www.washingtonpost.com/wp-dyn/content/article/2007/06/14/AR2007061402486.html


 59%|█████▉    | 8599/14621 [3:46:30<5:40:19,  3.39s/it]

Failed to parse URL at index 8598, URL: http://www.post-gazette.com/pg/07014/753720-42.stm


 59%|█████▉    | 8600/14621 [3:46:35<6:26:50,  3.85s/it]

Failed to parse URL at index 8599, URL: http://www.post-gazette.com/pg/06335/742583-42.stm


 59%|█████▉    | 8601/14621 [3:46:40<7:00:43,  4.19s/it]

Failed to parse URL at index 8600, URL: http://www.post-gazette.com/pg/09086/958602-42.stm


 59%|█████▉    | 8602/14621 [3:46:45<7:24:54,  4.43s/it]

Failed to parse URL at index 8601, URL: https://www.post-gazette.com/ae/music/2018/09/24/Pittsburgh-Symphony-Orchestra-Manfred-Honeck-contract-new-board-chair-Tony-Bucci/stories/201809240103


 59%|█████▉    | 8603/14621 [3:46:46<5:31:45,  3.31s/it]

Failed to parse URL at index 8602, URL: https://pittsburghsymphony.org/pso_home/press-room/press-releases/2021-2022/manfred-honeck-extends-contract-through-the-2027-2028-season


 59%|█████▉    | 8605/14621 [3:46:52<5:16:35,  3.16s/it]

Failed to parse URL at index 8604, URL: http://wqed.org/fm/psoradio/pso_history.php


 59%|█████▉    | 8607/14621 [3:46:53<2:58:46,  1.78s/it]

Failed to parse URL at index 8606, URL: http://www.pittsburghsymphony.org/pso_home/web/hh-history


 59%|█████▉    | 8611/14621 [3:46:57<1:53:33,  1.13s/it]

Failed to parse URL at index 8610, URL: http://www.pittsburghsymphony.org/


 59%|█████▉    | 8624/14621 [3:47:18<2:39:36,  1.60s/it]

Failed to parse URL at index 8623, URL: http://olduli.nli.org.il/F/?func=find-b&local_base=NLX10&find_code=UID&request=987007305258505171


 59%|█████▉    | 8628/14621 [3:47:26<4:04:04,  2.44s/it]

Failed to parse URL at index 8627, URL: https://www.idref.fr/16220339X


 59%|█████▉    | 8641/14621 [3:47:38<1:39:49,  1.00s/it]

Failed to parse URL at index 8640, URL: https://www.pittsburghsymphony.org/pso_home/web/about-landing/melia-p-tourangeau-president#maincontent


 59%|█████▉    | 8642/14621 [3:47:38<1:18:57,  1.26it/s]

Failed to parse URL at index 8641, URL: https://www.pittsburghsymphony.org/


 59%|█████▉    | 8643/14621 [3:47:39<1:04:29,  1.54it/s]

Failed to parse URL at index 8642, URL: https://www.pittsburghsymphony.org/shopping_cart/show


 59%|█████▉    | 8644/14621 [3:47:39<54:06,  1.84it/s]  

Failed to parse URL at index 8643, URL: https://www.pittsburghsymphony.org/calendar


 59%|█████▉    | 8645/14621 [3:47:39<46:57,  2.12it/s]

Failed to parse URL at index 8644, URL: https://www.pittsburghsymphony.org/pso_home/web/about-landing


 59%|█████▉    | 8646/14621 [3:47:40<41:58,  2.37it/s]

Failed to parse URL at index 8645, URL: https://www.pittsburghsymphony.org/pso_home/web/musicians


 59%|█████▉    | 8647/14621 [3:47:40<38:18,  2.60it/s]

Failed to parse URL at index 8646, URL: https://www.pittsburghsymphony.org/pso_home/biographies/pso-conductors/honeck-manfred


 59%|█████▉    | 8648/14621 [3:47:40<35:57,  2.77it/s]

Failed to parse URL at index 8647, URL: https://www.pittsburghsymphony.org/pso_home/web/about-landing/career-opportunities


 59%|█████▉    | 8649/14621 [3:47:41<42:56,  2.32it/s]

Failed to parse URL at index 8648, URL: https://culturaldistrict.org/advertising/advertise


 59%|█████▉    | 8650/14621 [3:47:41<39:15,  2.54it/s]

Failed to parse URL at index 8649, URL: https://www.pittsburghsymphony.org/pso_home/web/tickets-landing


 59%|█████▉    | 8651/14621 [3:47:42<36:37,  2.72it/s]

Failed to parse URL at index 8650, URL: https://www.pittsburghsymphony.org/pso_home/web/subscriptions/why-subscribe-24-25


 59%|█████▉    | 8652/14621 [3:47:42<34:35,  2.88it/s]

Failed to parse URL at index 8651, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs


 59%|█████▉    | 8653/14621 [3:47:42<33:14,  2.99it/s]

Failed to parse URL at index 8652, URL: https://www.pittsburghsymphony.org/pso_home/web/visit-landing


 59%|█████▉    | 8654/14621 [3:47:42<32:13,  3.09it/s]

Failed to parse URL at index 8653, URL: https://www.pittsburghsymphony.org/pso_home/web/tickets-landing/seating-charts


 59%|█████▉    | 8655/14621 [3:47:43<31:46,  3.13it/s]

Failed to parse URL at index 8654, URL: https://www.pittsburghsymphony.org/pso_home/web/visit-landing/frequently-asked-questions


 59%|█████▉    | 8656/14621 [3:47:43<33:49,  2.94it/s]

Failed to parse URL at index 8655, URL: https://www.pittsburghsymphony.org/pso_home/web/give-landing/corporate-partnerships/dining-partners


 59%|█████▉    | 8657/14621 [3:47:43<32:38,  3.05it/s]

Failed to parse URL at index 8656, URL: https://www.pittsburghsymphony.org/pso_home/web/visit-landing/directions-parking-lodging


 59%|█████▉    | 8658/14621 [3:47:44<32:00,  3.10it/s]

Failed to parse URL at index 8657, URL: https://www.pittsburghsymphony.org/pso_home/web/give-landing


 59%|█████▉    | 8660/14621 [3:47:45<53:23,  1.86it/s]  

Failed to parse URL at index 8659, URL: https://www.pittsburghsymphony.org/pso_home/web/give-landing/individual-giving


 59%|█████▉    | 8661/14621 [3:47:46<46:17,  2.15it/s]

Failed to parse URL at index 8660, URL: https://www.pittsburghsymphony.org/pso_home/web/give-landing/corporate-partnerships


 59%|█████▉    | 8664/14621 [3:47:49<1:25:43,  1.16it/s]

Failed to parse URL at index 8663, URL: https://www.pittsburghsymphony.org/order_history/show


 59%|█████▉    | 8665/14621 [3:47:49<1:09:24,  1.43it/s]

Failed to parse URL at index 8664, URL: https://www.pittsburghsymphony.org/terminate


 59%|█████▉    | 8666/14621 [3:47:50<57:47,  1.72it/s]  

Failed to parse URL at index 8665, URL: https://www.pittsburghsymphony.org/session


 59%|█████▉    | 8667/14621 [3:47:50<49:38,  2.00it/s]

Failed to parse URL at index 8666, URL: https://www.pittsburghsymphony.org/promotion/new


 59%|█████▉    | 8669/14621 [3:47:51<50:24,  1.97it/s]

Failed to parse URL at index 8668, URL: https://twitter.com/pghsymphony


 59%|█████▉    | 8670/14621 [3:47:52<53:43,  1.85it/s]

Failed to parse URL at index 8669, URL: https://www.instagram.com/pghsymphony/


 59%|█████▉    | 8672/14621 [3:47:59<2:52:15,  1.74s/it]

Failed to parse URL at index 8671, URL: https://www.pittsburghsymphony.org/pso_home/web/enotes-signup


 59%|█████▉    | 8673/14621 [3:47:59<2:09:41,  1.31s/it]

Failed to parse URL at index 8672, URL: https://www.pittsburghsymphony.org/pso_home/web/contact


 59%|█████▉    | 8674/14621 [3:47:59<1:39:41,  1.01s/it]

Failed to parse URL at index 8673, URL: https://www.pittsburghsymphony.org/pso_home/press-room


 59%|█████▉    | 8676/14621 [3:48:02<1:54:43,  1.16s/it]

Failed to parse URL at index 8675, URL: https://www.pittsburghsymphony.org/pso_home/web/reserving-heinz-hall


 59%|█████▉    | 8677/14621 [3:48:03<1:29:16,  1.11it/s]

Failed to parse URL at index 8676, URL: https://www.pittsburghsymphony.org/pso_home/web/public-disclosure-documents


 59%|█████▉    | 8678/14621 [3:48:03<1:11:22,  1.39it/s]

Failed to parse URL at index 8677, URL: https://www.pittsburghsymphony.org/pso_home/web/about-landing/tel:4123924900


 59%|█████▉    | 8679/14621 [3:48:03<58:56,  1.68it/s]  

Failed to parse URL at index 8678, URL: https://www.pittsburghsymphony.org/pso_home/web/privacy-policy


 59%|█████▉    | 8680/14621 [3:48:03<50:15,  1.97it/s]

Failed to parse URL at index 8679, URL: https://www.pittsburghsymphony.org/pso_home/web/terms-and-conditions


 59%|█████▉    | 8682/14621 [3:48:04<47:13,  2.10it/s]

Failed to parse URL at index 8681, URL: https://twitter.com/manfredhoneck


 59%|█████▉    | 8683/14621 [3:48:05<50:08,  1.97it/s]

Failed to parse URL at index 8682, URL: https://www.instagram.com/manfredhoneck/


 59%|█████▉    | 8684/14621 [3:48:05<44:09,  2.24it/s]

Failed to parse URL at index 8683, URL: https://www.pittsburghsymphony.org/pso_home/biographies/pso-conductors/tel:4123924900


 59%|█████▉    | 8686/14621 [3:48:06<39:56,  2.48it/s]

Failed to parse URL at index 8684, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/mccarroll-david
Failed to parse URL at index 8685, URL: https://pittsburghsymphony.org/pso_home/biographies/campagna-justine


 59%|█████▉    | 8688/14621 [3:48:06<29:38,  3.34it/s]

Failed to parse URL at index 8686, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/naroff-dylan
Failed to parse URL at index 8687, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/blumenthal-kelsey


 59%|█████▉    | 8689/14621 [3:48:07<29:36,  3.34it/s]

Failed to parse URL at index 8688, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/chen-livingston-ellen


 59%|█████▉    | 8691/14621 [3:48:07<26:37,  3.71it/s]

Failed to parse URL at index 8689, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/cheng-irene
Failed to parse URL at index 8690, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/clendenning-sarah


 59%|█████▉    | 8693/14621 [3:48:08<25:13,  3.92it/s]

Failed to parse URL at index 8691, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/peters-fujito-alison
Failed to parse URL at index 8692, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/krechkovsky-marta


 59%|█████▉    | 8694/14621 [3:48:08<26:30,  3.73it/s]

Failed to parse URL at index 8693, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/orchard-jennifer


 59%|█████▉    | 8695/14621 [3:48:08<27:25,  3.60it/s]

Failed to parse URL at index 8694, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/park-susanne


 59%|█████▉    | 8696/14621 [3:48:09<28:02,  3.52it/s]

Failed to parse URL at index 8695, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/yoder-kristina


 59%|█████▉    | 8697/14621 [3:48:09<28:28,  3.47it/s]

Failed to parse URL at index 8696, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/black-jeremy


 59%|█████▉    | 8698/14621 [3:48:09<28:54,  3.41it/s]

Failed to parse URL at index 8697, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/lev-louis


 59%|█████▉    | 8699/14621 [3:48:10<29:05,  3.39it/s]

Failed to parse URL at index 8698, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/oboyle-dennis


 60%|█████▉    | 8700/14621 [3:48:10<29:18,  3.37it/s]

Failed to parse URL at index 8699, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/motchalov-laura


 60%|█████▉    | 8701/14621 [3:48:10<26:37,  3.71it/s]

Failed to parse URL at index 8700, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/fuller-andrew


 60%|█████▉    | 8703/14621 [3:48:11<25:20,  3.89it/s]

Failed to parse URL at index 8701, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/benet-hart-lorien
Failed to parse URL at index 8702, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/kim-yeokyung


 60%|█████▉    | 8704/14621 [3:48:11<23:44,  4.15it/s]

Failed to parse URL at index 8703, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/boxianzi-vivian-ling


 60%|█████▉    | 8706/14621 [3:48:11<23:47,  4.14it/s]

Failed to parse URL at index 8704, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/mahave-claudia
Failed to parse URL at index 8705, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/pantikian-cecee


 60%|█████▉    | 8707/14621 [3:48:11<22:44,  4.33it/s]

Failed to parse URL at index 8706, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/regi-pappa


 60%|█████▉    | 8708/14621 [3:48:12<22:20,  4.41it/s]

Failed to parse URL at index 8707, URL: https://pittsburghsymphony.org/biography/carolyn-semes


 60%|█████▉    | 8709/14621 [3:48:12<22:00,  4.48it/s]

Failed to parse URL at index 8708, URL: https://pittsburghsymphony.org/biography/yingchen-zhang


 60%|█████▉    | 8710/14621 [3:48:12<24:31,  4.02it/s]

Failed to parse URL at index 8709, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/mead-chamis-tatjana


 60%|█████▉    | 8711/14621 [3:48:13<26:17,  3.75it/s]

Failed to parse URL at index 8710, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/vasquez-joen


 60%|█████▉    | 8712/14621 [3:48:13<24:42,  3.99it/s]

Failed to parse URL at index 8711, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/gingras-roy-marylene


 60%|█████▉    | 8713/14621 [3:48:13<23:28,  4.20it/s]

Failed to parse URL at index 8712, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/fuller-laura


 60%|█████▉    | 8714/14621 [3:48:13<22:41,  4.34it/s]

Failed to parse URL at index 8713, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/juhl-sean


 60%|█████▉    | 8715/14621 [3:48:13<25:03,  3.93it/s]

Failed to parse URL at index 8714, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/laraby-goldwasser-erina


 60%|█████▉    | 8716/14621 [3:48:14<26:30,  3.71it/s]

Failed to parse URL at index 8715, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/mossburg-aaron


 60%|█████▉    | 8718/14621 [3:48:14<25:03,  3.93it/s]

Failed to parse URL at index 8716, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/tretick-stephanie
Failed to parse URL at index 8717, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/wickesberg-andrew


 60%|█████▉    | 8719/14621 [3:48:14<23:30,  4.18it/s]

Failed to parse URL at index 8718, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/martindale-williams-anne


 60%|█████▉    | 8720/14621 [3:48:15<25:17,  3.89it/s]

Failed to parse URL at index 8719, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/dale-jeong


 60%|█████▉    | 8721/14621 [3:48:15<23:48,  4.13it/s]

Failed to parse URL at index 8720, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/liu-adam


 60%|█████▉    | 8722/14621 [3:48:15<22:46,  4.32it/s]

Failed to parse URL at index 8721, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/istomin-mikhail


 60%|█████▉    | 8723/14621 [3:48:15<22:05,  4.45it/s]

Failed to parse URL at index 8722, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/banerdt-bronwyn


 60%|█████▉    | 8724/14621 [3:48:16<21:36,  4.55it/s]

Failed to parse URL at index 8723, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/debruyn-michael


 60%|█████▉    | 8725/14621 [3:48:16<21:18,  4.61it/s]

Failed to parse URL at index 8724, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/lee-alexandra


 60%|█████▉    | 8727/14621 [3:48:16<22:37,  4.34it/s]

Failed to parse URL at index 8725, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/yun-ya-lo
Failed to parse URL at index 8726, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/powers-charlie


 60%|█████▉    | 8728/14621 [3:48:17<21:43,  4.52it/s]

Failed to parse URL at index 8727, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/shivone-karissa


 60%|█████▉    | 8729/14621 [3:48:17<21:21,  4.60it/s]

Failed to parse URL at index 8728, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/myers-nicholas


 60%|█████▉    | 8730/14621 [3:48:17<21:02,  4.67it/s]

Failed to parse URL at index 8729, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/mclean-brandon


 60%|█████▉    | 8731/14621 [3:48:17<22:24,  4.38it/s]

Failed to parse URL at index 8730, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/campagna-joseph


 60%|█████▉    | 8732/14621 [3:48:18<24:42,  3.97it/s]

Failed to parse URL at index 8731, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/grubbs-jeffrey


 60%|█████▉    | 8733/14621 [3:48:18<26:05,  3.76it/s]

Failed to parse URL at index 8732, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/guild-peter


 60%|█████▉    | 8734/14621 [3:48:18<27:13,  3.60it/s]

Failed to parse URL at index 8733, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/howard-micah


 60%|█████▉    | 8735/14621 [3:48:18<27:59,  3.50it/s]

Failed to parse URL at index 8734, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/moore-john


 60%|█████▉    | 8736/14621 [3:48:19<28:27,  3.45it/s]

Failed to parse URL at index 8735, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/white-aaron


 60%|█████▉    | 8737/14621 [3:48:19<25:58,  3.77it/s]

Failed to parse URL at index 8736, URL: https://pittsburghsymphony.org/biography/drew-collins?_gl=1*18m9dlf*_ga*MTY0Mjk5MzY1Mi4xNjk0MTkwMjgz*_ga_KS0GNGE5TT*MTY5NzgyMjg1My41Mi4xLjE2OTc4MjU4ODcuMTIuMC4w


 60%|█████▉    | 8738/14621 [3:48:19<27:19,  3.59it/s]

Failed to parse URL at index 8737, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/van-hoesen-gretchen


 60%|█████▉    | 8739/14621 [3:48:19<25:05,  3.91it/s]

Failed to parse URL at index 8738, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/mcghee-lorna


 60%|█████▉    | 8740/14621 [3:48:20<23:39,  4.14it/s]

Failed to parse URL at index 8739, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/steele-jennifer


 60%|█████▉    | 8741/14621 [3:48:20<22:43,  4.31it/s]

Failed to parse URL at index 8740, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/kenny-rhian?_gl=1*3n6m9o*_ga*MTg5NzYxMzA4NS4xNjYwNTc2NTM5*_ga_KS0GNGE5TT*MTY2NTY4MDI0OS4xMTYuMS4xNjY1NjgyMTI5LjU3LjAuMA..


 60%|█████▉    | 8743/14621 [3:48:20<23:15,  4.21it/s]

Failed to parse URL at index 8741, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/koledo-dealmeida-cynthia
Failed to parse URL at index 8742, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/blair-max


 60%|█████▉    | 8744/14621 [3:48:21<22:14,  4.40it/s]

Failed to parse URL at index 8743, URL: https://pittsburghsymphony.org/biography/samuel-nemec


 60%|█████▉    | 8746/14621 [3:48:21<22:54,  4.27it/s]

Failed to parse URL at index 8744, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/rusinek-michael
Failed to parse URL at index 8745, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/luperi-victoria


 60%|█████▉    | 8747/14621 [3:48:21<25:05,  3.90it/s]

Failed to parse URL at index 8746, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/samuels-ron


 60%|█████▉    | 8748/14621 [3:48:22<23:39,  4.14it/s]

Failed to parse URL at index 8747, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/howell-jack


 60%|█████▉    | 8749/14621 [3:48:22<25:33,  3.83it/s]

Failed to parse URL at index 8748, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/sogg-david


 60%|█████▉    | 8750/14621 [3:48:22<26:55,  3.63it/s]

Failed to parse URL at index 8749, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/pandolfi-philip


 60%|█████▉    | 8751/14621 [3:48:23<27:38,  3.54it/s]

Failed to parse URL at index 8750, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/rodgers-james
Failed to parse URL at index 8751, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/caballero-william


 60%|█████▉    | 8753/14621 [3:48:23<23:41,  4.13it/s]

Failed to parse URL at index 8752, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/kostyniak-stephen


 60%|█████▉    | 8754/14621 [3:48:23<22:35,  4.33it/s]

Failed to parse URL at index 8753, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/smith-zachary


 60%|█████▉    | 8755/14621 [3:48:23<21:52,  4.47it/s]

Failed to parse URL at index 8754, URL: https://pittsburghsymphony.org/biography/michelle-hembree


 60%|█████▉    | 8756/14621 [3:48:24<21:24,  4.57it/s]

Failed to parse URL at index 8755, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/houghton-mark


 60%|█████▉    | 8757/14621 [3:48:24<24:03,  4.06it/s]

Failed to parse URL at index 8756, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/lauver-robert


 60%|█████▉    | 8758/14621 [3:48:24<23:05,  4.23it/s]

Failed to parse URL at index 8757, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/wilkinson-micah


 60%|█████▉    | 8759/14621 [3:48:24<22:17,  4.38it/s]

Failed to parse URL at index 8758, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/conrad-jones


 60%|█████▉    | 8760/14621 [3:48:25<24:36,  3.97it/s]

Failed to parse URL at index 8759, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/berntsen-neal


 60%|█████▉    | 8761/14621 [3:48:25<26:20,  3.71it/s]

Failed to parse URL at index 8760, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/winkler-chad


 60%|█████▉    | 8762/14621 [3:48:25<24:41,  3.96it/s]

Failed to parse URL at index 8761, URL: https://pittsburghsymphony.org/biography/joshua-carr


 60%|█████▉    | 8763/14621 [3:48:25<26:19,  3.71it/s]

Failed to parse URL at index 8762, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/sullivan-peter


 60%|█████▉    | 8764/14621 [3:48:26<24:43,  3.95it/s]

Failed to parse URL at index 8763, URL: https://pittsburghsymphony.org/biography/douglas-rosenthal


 60%|█████▉    | 8765/14621 [3:48:26<26:11,  3.73it/s]

Failed to parse URL at index 8764, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/nova-james


 60%|█████▉    | 8766/14621 [3:48:26<24:19,  4.01it/s]

Failed to parse URL at index 8765, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/dee-jeffrey


 60%|█████▉    | 8767/14621 [3:48:26<26:08,  3.73it/s]

Failed to parse URL at index 8766, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/knox-craig


 60%|█████▉    | 8768/14621 [3:48:27<24:16,  4.02it/s]

Failed to parse URL at index 8767, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/benoit-james


 60%|█████▉    | 8769/14621 [3:48:27<26:00,  3.75it/s]

Failed to parse URL at index 8768, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/allen-christopher


 60%|█████▉    | 8770/14621 [3:48:27<26:57,  3.62it/s]

Failed to parse URL at index 8769, URL: https://www.pittsburghsymphony.org/pso_home/biographies/musicians/branson-jeremy


 60%|█████▉    | 8771/14621 [3:48:27<24:55,  3.91it/s]

Failed to parse URL at index 8770, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/ojeda-rodrigo


 60%|█████▉    | 8772/14621 [3:48:28<23:30,  4.15it/s]

Failed to parse URL at index 8771, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/gedris-lisa


 60%|██████    | 8773/14621 [3:48:28<22:29,  4.33it/s]

Failed to parse URL at index 8772, URL: https://pittsburghsymphony.org/pso_home/biographies/musicians/sheryl-hadeka


 60%|██████    | 8774/14621 [3:48:28<24:46,  3.93it/s]

Failed to parse URL at index 8773, URL: https://www.pittsburghsymphony.org/pso_home/web/tel:4123924900


 60%|██████    | 8775/14621 [3:48:28<26:19,  3.70it/s]

Failed to parse URL at index 8774, URL: https://www.pittsburghsymphony.org/pso_home/web/subscriptions/why-subscribe-24-25/24-25-fiddlesticks-musical-exploration


 60%|██████    | 8776/14621 [3:48:29<27:10,  3.58it/s]

Failed to parse URL at index 8775, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-programs


 60%|██████    | 8777/14621 [3:48:29<27:46,  3.51it/s]

Failed to parse URL at index 8776, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/student-side-by-side-program


 60%|██████    | 8779/14621 [3:48:30<29:56,  3.25it/s]

Failed to parse URL at index 8777, URL: http://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/student-side-by-side-program
Failed to parse URL at index 8778, URL: https://pittsburghsymphony.org/pso_home/web/community-landing/learning-and-engagement-volunteer-corps


 60%|██████    | 8780/14621 [3:48:30<29:42,  3.28it/s]

Failed to parse URL at index 8779, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/tel:4123924900


 60%|██████    | 8781/14621 [3:48:30<29:44,  3.27it/s]

Failed to parse URL at index 8780, URL: https://www.pittsburghsymphony.org/package/13405


 60%|██████    | 8782/14621 [3:48:31<29:51,  3.26it/s]

Failed to parse URL at index 8781, URL: https://www.pittsburghsymphony.org/package/renew/27427


 60%|██████    | 8783/14621 [3:48:31<29:37,  3.28it/s]

Failed to parse URL at index 8782, URL: https://www.pittsburghsymphony.org/production/94087/fiddlesticks-natures-soundtrack


 60%|██████    | 8784/14621 [3:48:31<29:33,  3.29it/s]

Failed to parse URL at index 8783, URL: https://www.pittsburghsymphony.org/production/94088/fiddlesticks-the-sounds-of-invention


 60%|██████    | 8785/14621 [3:48:32<29:36,  3.29it/s]

Failed to parse URL at index 8784, URL: https://www.pittsburghsymphony.org/production/94089/fiddlesticks-imagine-that


 60%|██████    | 8786/14621 [3:48:32<29:39,  3.28it/s]

Failed to parse URL at index 8785, URL: https://www.pittsburghsymphony.org/accessibility/wheelchair_seating?org=2


 60%|██████    | 8787/14621 [3:48:32<29:42,  3.27it/s]

Failed to parse URL at index 8786, URL: https://www.pittsburghsymphony.org/accessibility/sensory_friendly?org=2


 60%|██████    | 8788/14621 [3:48:33<29:30,  3.29it/s]

Failed to parse URL at index 8787, URL: https://www.pittsburghsymphony.org/accessibility/large_print?org=2


 60%|██████    | 8789/14621 [3:48:33<29:35,  3.28it/s]

Failed to parse URL at index 8788, URL: https://www.pittsburghsymphony.org/accessibility/sign_language?org=2


 60%|██████    | 8790/14621 [3:48:33<29:30,  3.29it/s]

Failed to parse URL at index 8789, URL: https://www.pittsburghsymphony.org/accessibility/braille?org=2


 60%|██████    | 8791/14621 [3:48:33<29:22,  3.31it/s]

Failed to parse URL at index 8790, URL: https://www.pittsburghsymphony.org/pso_home/accessibility/dauler-hearing-loop


 60%|██████    | 8792/14621 [3:48:34<29:26,  3.30it/s]

Failed to parse URL at index 8791, URL: https://www.pittsburghsymphony.org/pso_home/web/subscriptions/why-subscribe-24-25/tel:4123924900


 60%|██████    | 8793/14621 [3:48:34<29:29,  3.29it/s]

Failed to parse URL at index 8792, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-concerts#maincontent


 60%|██████    | 8801/14621 [3:48:43<1:23:28,  1.16it/s]

Failed to parse URL at index 8800, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-programs/schooltime-resources


 60%|██████    | 8802/14621 [3:48:43<1:07:41,  1.43it/s]

Failed to parse URL at index 8801, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-concerts/schooltime-know-before-you-go


 60%|██████    | 8803/14621 [3:48:43<56:26,  1.72it/s]  

Failed to parse URL at index 8802, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-concerts/schooltime-transportation


 60%|██████    | 8804/14621 [3:48:43<48:38,  1.99it/s]

Failed to parse URL at index 8803, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/tel:4123924900


 60%|██████    | 8805/14621 [3:48:44<43:01,  2.25it/s]

Failed to parse URL at index 8804, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-concerts/digital-schooltime#maincontent


 60%|██████    | 8806/14621 [3:48:44<38:56,  2.49it/s]

Failed to parse URL at index 8805, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-concerts/digital-schooltime---meet-the-orchestra-registration


 60%|██████    | 8807/14621 [3:48:44<36:05,  2.69it/s]

Failed to parse URL at index 8806, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-concerts/digital-schooltime---what-makes-jazz-registration


 60%|██████    | 8808/14621 [3:48:45<34:04,  2.84it/s]

Failed to parse URL at index 8807, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-concerts/digital-schooltime---exploring-jazz-with-byron-registration


 60%|██████    | 8809/14621 [3:48:45<32:38,  2.97it/s]

Failed to parse URL at index 8808, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-concerts/digital-schooltime---origin-stories-registration


 60%|██████    | 8810/14621 [3:48:45<31:39,  3.06it/s]

Failed to parse URL at index 8809, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-concerts/digital-schooltime---lift-every-voice-registration


 60%|██████    | 8811/14621 [3:48:46<30:56,  3.13it/s]

Failed to parse URL at index 8810, URL: https://www.pittsburghsymphony.org/pso_home/web/community-landing/learning-programs/schooltime-concerts/tel:4123924900


 60%|██████    | 8819/14621 [3:48:58<3:23:50,  2.11s/it]

Failed to parse URL at index 8818, URL: https://www.post-gazette.com/life/dining/2022/09/22/sultan-doner-gyro-pittsburgh/stories/202209130086


 60%|██████    | 8824/14621 [3:49:05<2:06:27,  1.31s/it]

Failed to parse URL at index 8823, URL: https://www.pittsburghsymphony.org/pso_home/web/subscriptions/why-subscribe


 60%|██████    | 8825/14621 [3:49:06<1:42:31,  1.06s/it]

Failed to parse URL at index 8824, URL: https://www.pittsburghsymphony.org/pso_home/web/give-landing/corporate-partnerships/tel:4123924900


 61%|██████    | 8903/14621 [3:50:07<1:18:27,  1.21it/s]

Failed to parse URL at index 8902, URL: https://www.twitter.com/bandsintown


 61%|██████    | 8904/14621 [3:50:08<1:27:17,  1.09it/s]

Failed to parse URL at index 8903, URL: https://instagram.com/bandsintown


 61%|██████    | 8914/14621 [3:50:18<1:20:41,  1.18it/s]

Failed to parse URL at index 8913, URL: http://www.ticketweb.com/venue/club-cafe-pittsburgh-pa/23219?pl=opus


 61%|██████    | 8926/14621 [3:50:34<1:30:03,  1.05it/s]

Failed to parse URL at index 8925, URL: http://cattivopgh.com/


 61%|██████    | 8942/14621 [3:50:53<1:42:39,  1.08s/it]

Failed to parse URL at index 8941, URL: http://www.instagram.com/poetrypgh/


 61%|██████    | 8945/14621 [3:50:56<1:49:29,  1.16s/it]

Failed to parse URL at index 8944, URL: http://jergels.com/


 61%|██████    | 8954/14621 [3:51:04<1:11:27,  1.32it/s]

Failed to parse URL at index 8953, URL: https://www.enclavepgh.com/upcomingevents


 61%|██████▏   | 8971/14621 [3:51:23<1:14:15,  1.27it/s]

Failed to parse URL at index 8970, URL: https://www.instagram.com/theundergroundpgh/


 61%|██████▏   | 8979/14621 [3:51:35<2:02:34,  1.30s/it]

Failed to parse URL at index 8978, URL: http://pittsburghsymphony.org/venues/heinz-hall


 61%|██████▏   | 8980/14621 [3:51:36<1:59:44,  1.27s/it]

Failed to parse URL at index 8979, URL: http://trustarts.org/pct_home/visit/facilities/benedum


 61%|██████▏   | 8981/14621 [3:51:37<1:42:28,  1.09s/it]

Failed to parse URL at index 8980, URL: http://trustarts.org/pct_home/visit/facilities/byham


 61%|██████▏   | 8983/14621 [3:51:40<1:56:51,  1.24s/it]

Failed to parse URL at index 8982, URL: http://trustarts.org/pct_home/visit/facilities/cabaret


 62%|██████▏   | 9007/14621 [3:52:06<1:49:52,  1.17s/it]

Failed to parse URL at index 9006, URL: https://www.visitpittsburgh.com/directory/hard-rock-cafe/


 62%|██████▏   | 9056/14621 [3:53:04<1:56:52,  1.26s/it]

Failed to parse URL at index 9055, URL: http://www.pittsburghlive.com/x/pittsburghtrib/s_592946.html


 62%|██████▏   | 9059/14621 [3:53:11<2:50:52,  1.84s/it]

Failed to parse URL at index 9058, URL: http://www.pittsburghopera.org/pages/green-initiative


 62%|██████▏   | 9071/14621 [3:53:25<1:46:23,  1.15s/it]

Failed to parse URL at index 9071, URL: https://www.nytimes.com/2017/11/07/arts/music/metropolitan-opera-high-note-exterminating-angel.html


 62%|██████▏   | 9073/14621 [3:53:27<1:38:22,  1.06s/it]

Failed to parse URL at index 9072, URL: https://www.pittsburghopera.org/about/pittsburgh-opera-headquarters


 62%|██████▏   | 9077/14621 [3:53:33<1:39:40,  1.08s/it]

Failed to parse URL at index 9076, URL: http://www.wolvertonartists.com/artists/walker/walker_bio.php


 62%|██████▏   | 9081/14621 [3:53:38<1:52:14,  1.22s/it]

Failed to parse URL at index 9080, URL: https://en.wikipedia.org/w/index.php?title=Theo_Alc%C3%A1ntara&action=edit&redlink=1


 62%|██████▏   | 9086/14621 [3:53:44<2:04:22,  1.35s/it]

Failed to parse URL at index 9085, URL: http://www.pittsburghopera.org/company/history.shtml?search=history=F000


 62%|██████▏   | 9087/14621 [3:53:50<3:57:26,  2.57s/it]

Failed to parse URL at index 9086, URL: http://www.post-gazette.com/magazine/19990504opera6.asp


 62%|██████▏   | 9088/14621 [3:53:55<5:05:10,  3.31s/it]

Failed to parse URL at index 9087, URL: http://www.post-gazette.com/pg/05282/584613-148.stm


 62%|██████▏   | 9089/14621 [3:53:55<3:39:56,  2.39s/it]

Failed to parse URL at index 9088, URL: http://www.pittsburghlive.com/x/pittsburghtrib/living/arts/opera/s_499095.html


 62%|██████▏   | 9090/14621 [3:53:55<2:40:17,  1.74s/it]

Failed to parse URL at index 9089, URL: http://www.pittsburghlive.com/x/pittsburghtrib/living/arts/opera/s_537217.html


 62%|██████▏   | 9091/14621 [3:54:00<4:10:47,  2.72s/it]

Failed to parse URL at index 9090, URL: http://www.post-gazette.com/pg/07302/829345-388.stm


 62%|██████▏   | 9092/14621 [3:54:05<5:14:35,  3.41s/it]

Failed to parse URL at index 9091, URL: http://www.post-gazette.com/pg/06133/689693-42.stm


 62%|██████▏   | 9094/14621 [3:54:08<3:21:44,  2.19s/it]

Failed to parse URL at index 9093, URL: http://www.pittsburghlive.com/x/pittsburghtrib/living/arts/opera/s_474523.html


 62%|██████▏   | 9095/14621 [3:54:13<4:39:48,  3.04s/it]

Failed to parse URL at index 9094, URL: http://www.post-gazette.com/pg/06285/729338-42.stm


 62%|██████▏   | 9096/14621 [3:54:18<5:34:45,  3.64s/it]

Failed to parse URL at index 9095, URL: http://www.post-gazette.com/pg/08093/869798-388.stm


 62%|██████▏   | 9110/14621 [3:54:34<2:09:49,  1.41s/it]

Failed to parse URL at index 9109, URL: https://opera.culturaldistrict.org/donate-now/give-to-the-pittsburgh-opera?_ga=2.211692373.1108986527.1715015278-1076642171.1704821445


 62%|██████▏   | 9111/14621 [3:54:34<1:36:44,  1.05s/it]

Failed to parse URL at index 9110, URL: https://opera.culturaldistrict.org/order_history/show?_ga=2.237858368.1108986527.1715015278-1076642171.1704821445


 63%|██████▎   | 9147/14621 [3:55:02<1:10:34,  1.29it/s]

Failed to parse URL at index 9147, URL: https://pittsburghoperalegacy.org/


 64%|██████▎   | 9318/14621 [3:56:25<1:26:13,  1.03it/s]

Failed to parse URL at index 9317, URL: https://trustarts.org/
Failed to parse URL at index 9318, URL: https://www.guidestar.org/profile/25-1073139
Failed to parse URL at index 9319, URL: https://www.guidestar.org/


 64%|██████▍   | 9325/14621 [3:56:30<56:29,  1.56it/s]  

Failed to parse URL at index 9324, URL: https://www.linkedin.com/company/pittsburgh-opera/


 64%|██████▍   | 9327/14621 [3:56:31<57:01,  1.55it/s]

Failed to parse URL at index 9326, URL: https://www.instagram.com/pittsburghopera/


 64%|██████▍   | 9328/14621 [3:56:31<51:35,  1.71it/s]

Failed to parse URL at index 9327, URL: https://twitter.com/#!/PittsburghOpera


 64%|██████▍   | 9334/14621 [3:56:37<1:27:49,  1.00it/s]

Failed to parse URL at index 9333, URL: https://opera.culturaldistrict.org/po_home?_ga=2.65669100.396123570.1713560044-719496185.1710961687


 64%|██████▍   | 9336/14621 [3:56:37<55:57,  1.57it/s]  

Failed to parse URL at index 9334, URL: https://opera.culturaldistrict.org/donate-now/give-to-the-pittsburgh-opera


 64%|██████▍   | 9360/14621 [3:57:00<1:19:23,  1.10it/s]

Failed to parse URL at index 9359, URL: http://noahbaetge.com/


 64%|██████▍   | 9366/14621 [3:57:06<1:18:16,  1.12it/s]

Failed to parse URL at index 9365, URL: http://hrartistsmanagement.com/brandt.html


 64%|██████▍   | 9377/14621 [3:57:17<2:51:13,  1.96s/it]

Failed to parse URL at index 9376, URL: http://www.stepheneisenhard.com/default.htm


 64%|██████▍   | 9389/14621 [3:57:29<1:38:48,  1.13s/it]

Failed to parse URL at index 9388, URL: https://www.tapestryoperock.com/holli-harrison


 64%|██████▍   | 9398/14621 [3:57:39<1:24:11,  1.03it/s]

Failed to parse URL at index 9397, URL: http://www.shannonkesslerdooley.com/


 64%|██████▍   | 9423/14621 [3:58:11<1:24:38,  1.02it/s]

Failed to parse URL at index 9422, URL: http://claudiarosenthal.com/


 64%|██████▍   | 9424/14621 [3:58:12<1:29:27,  1.03s/it]

Failed to parse URL at index 9423, URL: https://www.arsis-artists.com/en/artists/angel-romero


 64%|██████▍   | 9425/14621 [3:58:12<1:20:36,  1.07it/s]

Failed to parse URL at index 9424, URL: http://www.matthewscollin.com/


 64%|██████▍   | 9428/14621 [3:58:15<1:06:43,  1.30it/s]

Failed to parse URL at index 9427, URL: http://kevin-short.com/index.php


 65%|██████▍   | 9431/14621 [3:58:20<1:40:49,  1.17s/it]

Failed to parse URL at index 9430, URL: http://www.phsulzberger.com/


 65%|██████▍   | 9435/14621 [3:58:23<1:29:06,  1.03s/it]

Failed to parse URL at index 9434, URL: http://philiptorre.com/


 65%|██████▍   | 9490/14621 [3:58:59<1:28:51,  1.04s/it]

Failed to parse URL at index 9489, URL: https://operaamerica.org/content/about/PressRoom/2018/04102018.pdf


 65%|██████▍   | 9491/14621 [3:59:06<2:32:09,  1.78s/it]

Failed to parse URL at index 9490, URL: http://www.getty.edu/foundation/


 65%|██████▍   | 9492/14621 [3:59:07<2:19:35,  1.63s/it]

Failed to parse URL at index 9491, URL: https://hillmanfamilyfoundations.org/foundations/hillman-foundation/


 65%|██████▍   | 9494/14621 [3:59:18<4:39:04,  3.27s/it]

Failed to parse URL at index 9493, URL: https://learn.instantencore.com/livenote


 65%|██████▍   | 9495/14621 [3:59:18<3:48:09,  2.67s/it]

Failed to parse URL at index 9494, URL: https://www.philorch.org/#/


 65%|██████▍   | 9500/14621 [3:59:24<1:59:46,  1.40s/it]

Failed to parse URL at index 9499, URL: https://opera.culturaldistrict.org/production/92654/tosca


 65%|██████▌   | 9508/14621 [3:59:32<1:12:08,  1.18it/s]

Failed to parse URL at index 9506, URL: http://parkpgh.org/


 65%|██████▌   | 9510/14621 [3:59:41<3:16:50,  2.31s/it]

Failed to parse URL at index 9509, URL: https://trustarts.org/pct_home/visit/welcome-to-the-cultural-district/safety-security


 65%|██████▌   | 9511/14621 [3:59:41<2:23:27,  1.68s/it]

Failed to parse URL at index 9510, URL: https://opera.culturaldistrict.org/production/92661/cavalleria-rusticana-slash-pagliacci


 65%|██████▌   | 9515/14621 [3:59:44<1:30:15,  1.06s/it]

Failed to parse URL at index 9514, URL: https://piperartists.com/portfolio/ricardo-jose-rivera/


 65%|██████▌   | 9517/14621 [3:59:45<57:42,  1.47it/s]  

Failed to parse URL at index 9516, URL: https://opera.culturaldistrict.org/production/92675/armida


 65%|██████▌   | 9518/14621 [3:59:45<46:19,  1.84it/s]

Failed to parse URL at index 9517, URL: https://opera.culturaldistrict.org/production/92666/madama-butterfly


 65%|██████▌   | 9523/14621 [3:59:52<2:13:43,  1.57s/it]

Failed to parse URL at index 9522, URL: https://www.truccomanagement.com/nozomi-kato


 65%|██████▌   | 9529/14621 [3:59:56<1:11:24,  1.19it/s]

Failed to parse URL at index 9528, URL: https://opera.culturaldistrict.org/production/92683/woman-with-eyes-closed


 65%|██████▌   | 9549/14621 [4:00:21<2:36:20,  1.85s/it]

Failed to parse URL at index 9548, URL: https://twitter.com/PGHCityPaper


 65%|██████▌   | 9551/14621 [4:00:22<1:55:35,  1.37s/it]

Failed to parse URL at index 9550, URL: https://instagram.com/pghcitypaper/
Failed to parse URL at index 9551, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?v=d


 65%|██████▌   | 9555/14621 [4:00:28<2:10:27,  1.55s/it]

Failed to parse URL at index 9555, URL: https://www.pghcitypaper.com/
Failed to parse URL at index 9556, URL: https://www.pghcitypaper.com/pittsburgh/Newsletter
Failed to parse URL at index 9557, URL: https://www.pghcitypaper.com/pittsburgh/PittsburghCityPapersMembershipCampaign/Page


 65%|██████▌   | 9564/14621 [4:00:31<40:30,  2.08it/s]  

Failed to parse URL at index 9559, URL: https://www.pghcitypaper.com/pittsburgh/ArticleArchives
Failed to parse URL at index 9560, URL: https://www.pghcitypaper.com/pittsburgh/SlideshowArchives
Failed to parse URL at index 9561, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch
Failed to parse URL at index 9562, URL: https://www.pghcitypaper.com/pittsburgh/LocationSearch
Failed to parse URL at index 9563, URL: https://www.pghcitypaper.com/pittsburgh/Home
Failed to parse URL at index 9564, URL: https://www.pghcitypaper.com/news
Failed to parse URL at index 9565, URL: https://www.pghcitypaper.com/category/community-profile-18792262
Failed to parse URL at index 9566, URL: https://www.pghcitypaper.com/category/environment-22045756


 65%|██████▌   | 9576/14621 [4:00:31<13:54,  6.05it/s]

Failed to parse URL at index 9567, URL: https://www.pghcitypaper.com/category/health-22045736
Failed to parse URL at index 9568, URL: https://www.pghcitypaper.com/category/infrastructure-22045766
Failed to parse URL at index 9569, URL: https://www.pghcitypaper.com/category/labor-22045776
Failed to parse URL at index 9570, URL: https://www.pghcitypaper.com/category/lgbtq-18791966
Failed to parse URL at index 9571, URL: https://www.pghcitypaper.com/category/politics-22045746
Failed to parse URL at index 9572, URL: https://www.pghcitypaper.com/category/social-justice-22051864
Failed to parse URL at index 9573, URL: https://www.pghcitypaper.com/category/weed-21373633
Failed to parse URL at index 9574, URL: https://www.pghcitypaper.com/food-drink
Failed to parse URL at index 9575, URL: https://www.pghcitypaper.com/category/food-2320126
Failed to parse URL at index 9576, URL: https://www.pghcitypaper.com/category/drink-1380413
Failed to parse URL at index 9577, URL: https://www.pghcitypaper.

 66%|██████▌   | 9588/14621 [4:00:31<06:39, 12.59it/s]

Failed to parse URL at index 9579, URL: https://www.pghcitypaper.com/pittsburgh/ArticleArchives?category=19719084
Failed to parse URL at index 9580, URL: https://www.pghcitypaper.com/category/literary-arts-1332595
Failed to parse URL at index 9581, URL: https://www.pghcitypaper.com/category/music-1332586
Failed to parse URL at index 9582, URL: https://www.pghcitypaper.com/pittsburgh/ArticleArchives?category=22112083
Failed to parse URL at index 9583, URL: https://www.pghcitypaper.com/category/screen-1332592
Failed to parse URL at index 9584, URL: https://www.pghcitypaper.com/category/theater-1332590
Failed to parse URL at index 9585, URL: https://www.pghcitypaper.com/category/visual-art-15401462
Failed to parse URL at index 9586, URL: https://www.pghcitypaper.com/columns
Failed to parse URL at index 9587, URL: https://www.pghcitypaper.com/category/opinion-1787522


 66%|██████▌   | 9599/14621 [4:00:31<04:07, 20.29it/s]

Failed to parse URL at index 9588, URL: https://www.pghcitypaper.com/pittsburgh/LynnCullenLive/Page
Failed to parse URL at index 9589, URL: https://www.pghcitypaper.com/category/pillow-talk-with-jessie-sage-25432809
Failed to parse URL at index 9590, URL: https://www.pghcitypaper.com/category/affordable-ish-housing-25087355
Failed to parse URL at index 9591, URL: https://www.pghcitypaper.com/category/free-will-astrology-6520482
Failed to parse URL at index 9592, URL: https://www.pghcitypaper.com/pittsburgh/ArticleArchives?category=20388759
Failed to parse URL at index 9593, URL: https://www.pghcitypaper.com/pittsburgh/SlideshowArchives?keywords=
Failed to parse URL at index 9594, URL: https://www.pghcitypaper.com/category/this-weeks-top-events-20785992
Failed to parse URL at index 9595, URL: https://www.pghcitypaper.com/pittsburgh/Events/AddEvent
Failed to parse URL at index 9596, URL: https://www.pghcitypaper.com/bestof
Failed to parse URL at index 9597, URL: https://www.pghcitypaper.

 66%|██████▌   | 9609/14621 [4:00:32<02:56, 28.43it/s]

Failed to parse URL at index 9599, URL: https://www.pghcitypaper.com/specials-guides
Failed to parse URL at index 9600, URL: https://www.pghcitypaper.com/guide/shop-local
Failed to parse URL at index 9601, URL: https://www.pghcitypaper.com/category/pittsburghs-people-of-the-year-16399436
Failed to parse URL at index 9602, URL: https://www.pghcitypaper.com/guide/give-local
Failed to parse URL at index 9603, URL: https://www.pghcitypaper.com/category/election-guide-14973309
Failed to parse URL at index 9604, URL: https://www.pghcitypaper.com/pittsburgh/2022-winter-guide/Content?oid=20992964
Failed to parse URL at index 9605, URL: https://www.pghcitypaper.com/category/spring-guide-14191717
Failed to parse URL at index 9606, URL: https://www.pghcitypaper.com/specials-guides/pittsburgh-city-papers-2023-summer-guide-24022837
Failed to parse URL at index 9607, URL: https://www.pghcitypaper.com/category/fall-guide-15817790
Failed to parse URL at index 9608, URL: https://www.pghcitypaper.com/pi

 66%|██████▌   | 9614/14621 [4:00:32<03:37, 23.04it/s]

Failed to parse URL at index 9610, URL: https://www.pghcitypaper.com/blogs/Blogh/
Failed to parse URL at index 9611, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?feature=Promoted
Failed to parse URL at index 9612, URL: https://www.pghcitypaper.com/pittsburgh/jazz-at-emmanuel/Event?oid=26830987
Failed to parse URL at index 9613, URL: https://www.pghcitypaper.com/pittsburgh/norberts-haunted-festival/Event?oid=26562450
Failed to parse URL at index 9614, URL: https://www.pghcitypaper.com/pittsburgh/wyep-hellbender-ball-night-of-the-living-hellbender/Event?oid=26574798
Failed to parse URL at index 9615, URL: https://www.pghcitypaper.com/pittsburgh/emmanuel-episcopal-church/Location?oid=24853566
Failed to parse URL at index 9616, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354773
Failed to parse URL at index 9617, URL: https://www.pghcitypaper.com/pittsburgh/halloween-season-of-the-sketch/Event?oid=26694744
Failed to parse URL at index 9618, URL: https

 66%|██████▌   | 9620/14621 [4:00:33<07:57, 10.48it/s]

Failed to parse URL at index 9620, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354754
Failed to parse URL at index 9621, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354756
Failed to parse URL at index 9622, URL: https://www.pghcitypaper.com/pittsburgh/ritual-sound-movement-healing-art-wellness-workshop-and-quarterly-immersion/Event?oid=26756233
Failed to parse URL at index 9623, URL: https://www.pghcitypaper.com/pittsburgh/wightman-school/Location?oid=1423258


 66%|██████▌   | 9631/14621 [4:00:37<15:25,  5.39it/s]

Failed to parse URL at index 9625, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354759
Failed to parse URL at index 9626, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354775
Failed to parse URL at index 9627, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354760
Failed to parse URL at index 9628, URL: https://www.pghcitypaper.com/pittsburgh/pittsburgh-pride-bowling/Event?oid=1693328
Failed to parse URL at index 9629, URL: https://www.pghcitypaper.com/pittsburgh/forward-lanes/Location?oid=1424708
Failed to parse URL at index 9630, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354765
Failed to parse URL at index 9631, URL: https://www.pghcitypaper.com/pittsburgh/4th-annual-paws-against-domestic-violence-pet-walk-for-crisis-center-north/Event?oid=26556351
Failed to parse URL at index 9632, URL: https://www.pghcitypaper.com/pittsburgh/north-park-harmar-pavilion/Location?oid=14515645


 66%|██████▌   | 9638/14621 [4:00:38<14:13,  5.84it/s]

Failed to parse URL at index 9634, URL: https://www.pghcitypaper.com/pittsburgh/neighborhood-flea-may-12th-mothers-day-edition/Event?oid=25810570
Failed to parse URL at index 9635, URL: https://www.pghcitypaper.com/pittsburgh/south-side-works/Location?oid=1946641
Failed to parse URL at index 9636, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=7668655
Failed to parse URL at index 9637, URL: https://www.pghcitypaper.com/pittsburgh/the-dollar-sale/Event?oid=26718105
Failed to parse URL at index 9638, URL: https://www.pghcitypaper.com/pittsburgh/jerrys-records/Location?oid=26718104
Failed to parse URL at index 9639, URL: https://www.pghcitypaper.com/pittsburgh/sunday-tailgate-party/Event?oid=26662572
Failed to parse URL at index 9640, URL: https://www.pghcitypaper.com/pittsburgh/hazelwood-brew-house/Location?oid=26628744
Failed to parse URL at index 9641, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=8976382
Failed to parse URL at index 964

 66%|██████▌   | 9644/14621 [4:00:38<09:37,  8.62it/s]

Failed to parse URL at index 9643, URL: https://www.pghcitypaper.com/pittsburgh/powdermill-nature-reserve/Location?oid=25445697


 66%|██████▌   | 9648/14621 [4:00:42<28:49,  2.88it/s]

Failed to parse URL at index 9645, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354763
Failed to parse URL at index 9646, URL: https://www.pghcitypaper.com/pittsburgh/roll-to-the-polls-pop-up/Event?oid=26823537
Failed to parse URL at index 9647, URL: https://www.pghcitypaper.com/pittsburgh/roll-to-the-polls-pop-up/Location?oid=26823536


 66%|██████▌   | 9651/14621 [4:00:44<33:32,  2.47it/s]

Failed to parse URL at index 9649, URL: https://www.pghcitypaper.com/pittsburgh/international-poetry-forum-presents-terrance-hayes/Event?oid=26772135
Failed to parse URL at index 9650, URL: https://www.pghcitypaper.com/pittsburgh/city-of-asylum-alphabet-city/Location?oid=1950118


 66%|██████▌   | 9659/14621 [4:00:46<22:34,  3.66it/s]

Failed to parse URL at index 9652, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354762
Failed to parse URL at index 9653, URL: https://www.pghcitypaper.com/pittsburgh/tanger-outlets-pittsburgh-hurricane-relieve-supply-drive/Event?oid=26821696
Failed to parse URL at index 9654, URL: https://www.pghcitypaper.com/pittsburgh/tanger-outlets-pittsburgh/Location?oid=24700044
Failed to parse URL at index 9655, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354767
Failed to parse URL at index 9656, URL: https://www.pghcitypaper.com/pittsburgh/steelers-watch-party-with-former-steeler-jt-thomas/Event?oid=26760614
Failed to parse URL at index 9657, URL: https://www.pghcitypaper.com/pittsburgh/city-kitchen/Location?oid=26605096
Failed to parse URL at index 9658, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=9149005
Failed to parse URL at index 9659, URL: https://www.pghcitypaper.com/pittsburgh/neon-brush-sip-and-paint-work

 66%|██████▌   | 9662/14621 [4:00:47<27:49,  2.97it/s]

Failed to parse URL at index 9662, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=13062393
Failed to parse URL at index 9663, URL: https://www.pghcitypaper.com/pittsburgh/authentic-flamenco-presents-el-yiyo/Event?oid=26599079
Failed to parse URL at index 9664, URL: https://www.pghcitypaper.com/pittsburgh/andrew-carnegie-free-library-music-hall/Location?oid=1423567


 66%|██████▌   | 9666/14621 [4:00:48<26:43,  3.09it/s]

Failed to parse URL at index 9666, URL: https://www.pghcitypaper.com/pittsburgh/guttermouth-rising-seas-midnight-lights/Event?oid=26741452
Failed to parse URL at index 9667, URL: https://www.pghcitypaper.com/pittsburgh/preserving-underground/Location?oid=25512609


 66%|██████▌   | 9669/14621 [4:00:49<24:02,  3.43it/s]

Failed to parse URL at index 9668, URL: https://www.etix.com/ticket/p/42112549/guttermouth-new-kensington-preserving?partner_id=100
Failed to parse URL at index 9669, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=7668659
Failed to parse URL at index 9670, URL: https://www.pghcitypaper.com/pittsburgh/beyond-the-veil-with-the-psychic-vincent-sisters-at-kingfly-spirits-distillery-pittsburgh/Event?oid=26657093
Failed to parse URL at index 9671, URL: https://www.pghcitypaper.com/pittsburgh/kingfly-spirits/Location?oid=13610063


 66%|██████▌   | 9677/14621 [4:00:53<30:07,  2.73it/s]

Failed to parse URL at index 9673, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354753
Failed to parse URL at index 9674, URL: https://www.pghcitypaper.com/pittsburgh/beyond-the-veil-with-the-psychic-vincent-sisters-at-kingfly-spirits-distillery/Event?oid=26656873
Failed to parse URL at index 9675, URL: https://www.pghcitypaper.com/pittsburgh/beyond-the-veil-with-the-psychic-vincent-sisters-at-kingfly-spirits-distillery-pittsburgh/Event?oid=26657099
Failed to parse URL at index 9676, URL: https://www.pghcitypaper.com/pittsburgh/pittsburgh-moving-picture-festival-thriller-picture-show-2024/Event?oid=26782145
Failed to parse URL at index 9677, URL: https://www.pghcitypaper.com/pittsburgh/the-lindsay-theater-and-cultural-center/Location?oid=26158202


 66%|██████▌   | 9681/14621 [4:00:55<28:45,  2.86it/s]

Failed to parse URL at index 9678, URL: https://ticketing.useast.veezi.com/purchase/24087?siteToken=6avxdqr7sn7fr75807a5k307jm
Failed to parse URL at index 9679, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=6098067
Failed to parse URL at index 9680, URL: https://www.pghcitypaper.com/pittsburgh/the-revolutionists-by-lauren-gunderson-r-act/Event?oid=26660596
Failed to parse URL at index 9681, URL: https://www.pghcitypaper.com/pittsburgh/segriff-stage/Location?oid=26153416


 66%|██████▌   | 9683/14621 [4:00:56<37:42,  2.18it/s]

Failed to parse URL at index 9683, URL: https://www.pghcitypaper.com/pittsburgh/gilbert-and-sullivans-patience/Event?oid=26706928
Failed to parse URL at index 9684, URL: https://www.pghcitypaper.com/pittsburgh/greater-pittsburgh-masonic-center/Location?oid=1423424


 66%|██████▌   | 9686/14621 [4:00:57<34:10,  2.41it/s]

Failed to parse URL at index 9686, URL: https://www.pghcitypaper.com/pittsburgh/the-trees/Event?oid=26691518
Failed to parse URL at index 9687, URL: https://www.pghcitypaper.com/pittsburgh/pittsburgh-playhouse-of-point-park-university/Location?oid=15032237


 66%|██████▋   | 9689/14621 [4:00:58<31:34,  2.60it/s]

Failed to parse URL at index 9688, URL: https://playhouse.culturaldistrict.org/production/95451/list_performances
Failed to parse URL at index 9689, URL: https://www.pghcitypaper.com/pittsburgh/smokin-betties-burlesque-presents-return-of-the-living-betties/Event?oid=26586560
Failed to parse URL at index 9690, URL: https://www.pghcitypaper.com/pittsburgh/club-cafe/Location?oid=1422467


 66%|██████▋   | 9698/14621 [4:00:59<13:32,  6.06it/s]

Failed to parse URL at index 9691, URL: https://www.ticketweb.com/event/smokin-betties-burlesque-presents-return-club-cafe-tickets/13865573
Failed to parse URL at index 9692, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=12868660
Failed to parse URL at index 9693, URL: https://www.pghcitypaper.com/pittsburgh/pittsburgh-opera-presents-tosca/Event?oid=26707737
Failed to parse URL at index 9694, URL: https://www.pghcitypaper.com/pittsburgh/benedum-center/Location?oid=2713874
Failed to parse URL at index 9695, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=7668653
Failed to parse URL at index 9696, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?eventCategory=1354778
Failed to parse URL at index 9697, URL: https://www.pghcitypaper.com/pittsburgh/evil-dead-the-musical/Event?oid=26464457
Failed to parse URL at index 9698, URL: https://www.pghcitypaper.com/pittsburgh/pittsburgh-musical-theater/Location?oid=4110384


 66%|██████▋   | 9701/14621 [4:01:01<28:22,  2.89it/s]

Failed to parse URL at index 9699, URL: https://pittsburghmusicals.com/evildeadtickets
Failed to parse URL at index 9700, URL: https://www.pghcitypaper.com/pittsburgh/the-neighborhood-flea/Event?oid=1917607
Failed to parse URL at index 9701, URL: https://www.pghcitypaper.com/pittsburgh/strip-district/Location?oid=1800304
Failed to parse URL at index 9702, URL: https://www.pghcitypaper.com/pittsburgh/content-writing-service-affordable-price-cheap-content-writers/Event?oid=17726664
Failed to parse URL at index 9703, URL: https://www.pghcitypaper.com/pittsburgh/pittsburgh-writing-exchange/Location?oid=1481777


 66%|██████▋   | 9713/14621 [4:01:03<14:03,  5.82it/s]

Failed to parse URL at index 9705, URL: https://www.pghcitypaper.com/pittsburgh/pflag-pittsburgh/Event?oid=1848211
Failed to parse URL at index 9706, URL: https://www.pghcitypaper.com/pittsburgh/third-presbyterian-church/Location?oid=1423314
Failed to parse URL at index 9707, URL: https://www.pghcitypaper.com/pittsburgh/arabic-for-beginners/Event?oid=1864210
Failed to parse URL at index 9708, URL: https://www.pghcitypaper.com/pittsburgh/carnegie-library-oakland/Location?oid=1423155
Failed to parse URL at index 9709, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?page=2&v=d
Failed to parse URL at index 9710, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?page=3&v=d
Failed to parse URL at index 9711, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?page=4&v=d
Failed to parse URL at index 9712, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?page=5&v=d
Failed to parse URL at index 9713, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?page=6&v=

 66%|██████▋   | 9717/14621 [4:01:03<10:26,  7.83it/s]

Failed to parse URL at index 9714, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?page=7&v=d
Failed to parse URL at index 9715, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?page=8&v=d
Failed to parse URL at index 9716, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?page=9&v=d
Failed to parse URL at index 9717, URL: https://www.pghcitypaper.com/pittsburgh/EventSearch?page=10&v=d


 66%|██████▋   | 9720/14621 [4:01:06<26:40,  3.06it/s]

Failed to parse URL at index 9719, URL: https://www.pghcitypaper.com/pittsburgh/IssueArchives?issue=26817150


 67%|██████▋   | 9725/14621 [4:01:06<19:45,  4.13it/s]

Failed to parse URL at index 9722, URL: https://www.pghcitypaper.com/pittsburgh/barack-obama-gives-impassioned-speech-in-support-of-kamala-harris-on-pitts-campus/Slideshow/26833027
Failed to parse URL at index 9723, URL: https://www.pghcitypaper.com/news/crustys-quarry-in-volant-pa-boasts-some-unique-wildlife-26827450
Failed to parse URL at index 9724, URL: https://www.pghcitypaper.com/best-of/2024/intro/best-of-pittsburgh-2024-readers-poll-26661082
Failed to parse URL at index 9725, URL: https://www.pghcitypaper.com/news/western-psych-nurses-vote-to-strike-claiming-upmc-has-undermined-mental-healthcare-26822139
Failed to parse URL at index 9726, URL: https://www.pghcitypaper.com/news/harolds-haunt-celebrates-two-years-of-ghosts-fun-and-inclusion-in-millvale-26828665
Failed to parse URL at index 9727, URL: https://www.pghcitypaper.com/pittsburgh/MovieTimes


 67%|██████▋   | 9730/14621 [4:01:06<12:20,  6.60it/s]

Failed to parse URL at index 9728, URL: https://www.pghcitypaper.com/pittsburgh/AboutUs/Page
Failed to parse URL at index 9729, URL: https://www.pghcitypaper.com/pittsburgh/ContactUs/Page
Failed to parse URL at index 9730, URL: https://www.pghcitypaper.com/pittsburgh/Employment/Page
Failed to parse URL at index 9731, URL: https://www.pghcitypaper.com/pittsburgh/Syndication
Failed to parse URL at index 9732, URL: https://www.pghcitypaper.com/pittsburgh/AdvertiseContact/Page
Failed to parse URL at index 9733, URL: https://www.pghcitypaper.com/category/now-hiring-20388759


 67%|██████▋   | 9735/14621 [4:01:07<09:09,  8.88it/s]

Failed to parse URL at index 9735, URL: https://www.pghcitypaper.com/category/sponsored-content-16477698
Failed to parse URL at index 9736, URL: https://www.pghcitypaper.com/pittsburgh/iPittsburghCityPaperiPitchGuidelines/Page
Failed to parse URL at index 9737, URL: https://www.pghcitypaper.com/pittsburgh/CommentsPrivacyPolicy/Page


 67%|██████▋   | 9817/14621 [4:03:45<2:42:23,  2.03s/it]

Failed to parse URL at index 9816, URL: https://downtownpittsburgh.com/events/?n=1&y=2025&cat=0


 67%|██████▋   | 9818/14621 [4:03:51<3:55:21,  2.94s/it]

Failed to parse URL at index 9817, URL: https://downtownpittsburgh.com/events/?n=12&y=2024


 67%|██████▋   | 9842/14621 [4:04:51<1:55:15,  1.45s/it]

Failed to parse URL at index 9841, URL: https://twitter.com/DowntownPitt


 67%|██████▋   | 9843/14621 [4:04:51<1:36:56,  1.22s/it]

Failed to parse URL at index 9842, URL: https://www.instagram.com/downtownpitt/


 67%|██████▋   | 9846/14621 [4:05:02<3:44:43,  2.82s/it]

Failed to parse URL at index 9845, URL: https://downtownpittsburgh.com/events/?n=12&y=2024&cat=0


 67%|██████▋   | 9847/14621 [4:05:07<4:51:31,  3.66s/it]

Failed to parse URL at index 9846, URL: https://downtownpittsburgh.com/events/?n=11&y=2024


 68%|██████▊   | 9880/14621 [4:06:28<2:32:06,  1.92s/it]

# Dynamic crawling for the events data

In [None]:
!pip install webdriver_manager
# Update package list and install necessary dependencies
!apt-get update
!apt-get install -y wget unzip libvulkan1

# Download and install Google Chrome
!wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
!dpkg -i google-chrome-stable_current_amd64.deb
!apt-get install -f -y

# Install xvfb for virtual framebuffer support
!apt-get install -y xvfb

# Install selenium and chromedriver-autoinstaller
!pip install selenium chromedriver-autoinstaller


Hit:1 http://deb.debian.org/debian buster InRelease
Hit:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:3 http://deb.debian.org/debian buster-updates InRelease
Get:4 https://dl.google.com/linux/chrome/deb stable InRelease [1,825 B]
Hit:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:6 http://deb.debian.org/debian-security buster/updates InRelease
Get:7 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:8 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:9 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:10 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:11 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:12 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:13 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Hit:14 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Get:15 https:/

In [None]:
import csv
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
from tqdm import tqdm
import time
# Automatically install the correct version of ChromeDriver
import chromedriver_autoinstaller
chromedriver_autoinstaller.install()

# Function to fetch the page content using Selenium
def fetch_page_text_selenium(url, driver):
    try:
        driver.get(url)

        # Wait for the page to load dynamically (up to 10 seconds)
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )

        # Get page source
        page_source = driver.page_source

        # Parse the page content using BeautifulSoup
        soup = BeautifulSoup(page_source, 'html.parser')

        # Extract and clean the text from the page
        page_text = soup.get_text(separator='\n', strip=True)
        return page_text

    except Exception as e:
        print(f"Error fetching {url}: {e}")
        return None

# Function to read URLs from CSV and crawl each one
def crawl_urls_from_csv(csv_file_path, url_column_name, driver):
    with open(csv_file_path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        urls = [row[url_column_name] for row in reader]  # Extract all URLs

        # Use tqdm to show progress
        for index, url in enumerate(tqdm(urls, desc="Crawling URLs")):
            text = fetch_page_text_selenium(url, driver)
            if text:
                # Save the crawled text to a file with the index as the filename
                output_file = f"./data/scraped/events_test/{index}.txt"
                with open(output_file, 'w', encoding='utf-8') as f:
                    f.write(text)
                print(f"Saved content to {output_file}")

if __name__ == "__main__":
    csv_file_path = './data/raw/csv_data/events_after_10_27.csv'
    url_column_name = 'Source URL'

    # Initialize the Chrome WebDriver
    service = Service(executable_path=r'/usr/bin/chromedriver')
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.headless = True
    driver = webdriver.Chrome(service=service, options=options)

    try:
        # Start crawling the URLs from the CSV
        crawl_urls_from_csv(csv_file_path, url_column_name, driver)
    except Exception as e:
        print(f"An error occurred during crawling: {e}")
    finally:
        # Ensure the WebDriver is properly closed
        driver.quit()

Crawling URLs:   2%|▎         | 1/40 [00:02<01:30,  2.31s/it]

Saved content to ./data/scraped/events_test/0.txt


Crawling URLs:   5%|▌         | 2/40 [00:04<01:33,  2.47s/it]

Saved content to ./data/scraped/events_test/1.txt


Crawling URLs:   8%|▊         | 3/40 [00:06<01:24,  2.28s/it]

Saved content to ./data/scraped/events_test/2.txt


Crawling URLs:  10%|█         | 4/40 [00:08<01:08,  1.90s/it]

Saved content to ./data/scraped/events_test/3.txt


Crawling URLs:  12%|█▎        | 5/40 [00:11<01:17,  2.22s/it]

Saved content to ./data/scraped/events_test/4.txt


Crawling URLs:  15%|█▌        | 6/40 [00:14<01:25,  2.52s/it]

Saved content to ./data/scraped/events_test/5.txt


Crawling URLs:  18%|█▊        | 7/40 [00:14<01:00,  1.83s/it]

Saved content to ./data/scraped/events_test/6.txt


Crawling URLs:  20%|██        | 8/40 [00:16<00:58,  1.84s/it]

Saved content to ./data/scraped/events_test/7.txt


Crawling URLs:  22%|██▎       | 9/40 [00:18<00:58,  1.89s/it]

Saved content to ./data/scraped/events_test/8.txt


Crawling URLs:  25%|██▌       | 10/40 [00:21<01:06,  2.21s/it]

Saved content to ./data/scraped/events_test/9.txt


Crawling URLs:  28%|██▊       | 11/40 [00:23<01:00,  2.08s/it]

Saved content to ./data/scraped/events_test/10.txt


Crawling URLs:  30%|███       | 12/40 [00:24<00:53,  1.91s/it]

Saved content to ./data/scraped/events_test/11.txt


Crawling URLs:  32%|███▎      | 13/40 [00:26<00:52,  1.94s/it]

Saved content to ./data/scraped/events_test/12.txt


Crawling URLs:  35%|███▌      | 14/40 [00:28<00:47,  1.84s/it]

Saved content to ./data/scraped/events_test/13.txt


Crawling URLs:  38%|███▊      | 15/40 [00:30<00:45,  1.82s/it]

Saved content to ./data/scraped/events_test/14.txt


Crawling URLs:  40%|████      | 16/40 [00:31<00:42,  1.76s/it]

Saved content to ./data/scraped/events_test/15.txt


Crawling URLs:  42%|████▎     | 17/40 [00:33<00:39,  1.73s/it]

Saved content to ./data/scraped/events_test/16.txt


Crawling URLs:  45%|████▌     | 18/40 [00:34<00:33,  1.53s/it]

Saved content to ./data/scraped/events_test/17.txt


Crawling URLs:  48%|████▊     | 19/40 [00:35<00:29,  1.43s/it]

Saved content to ./data/scraped/events_test/18.txt


Crawling URLs:  50%|█████     | 20/40 [00:36<00:25,  1.27s/it]

Saved content to ./data/scraped/events_test/19.txt


Crawling URLs:  52%|█████▎    | 21/40 [00:40<00:38,  2.05s/it]

Saved content to ./data/scraped/events_test/20.txt


Crawling URLs:  55%|█████▌    | 22/40 [00:47<01:06,  3.72s/it]

Saved content to ./data/scraped/events_test/21.txt


Crawling URLs:  57%|█████▊    | 23/40 [00:55<01:21,  4.77s/it]

Saved content to ./data/scraped/events_test/22.txt


Crawling URLs:  60%|██████    | 24/40 [01:03<01:33,  5.83s/it]

Saved content to ./data/scraped/events_test/23.txt


Crawling URLs:  62%|██████▎   | 25/40 [01:11<01:39,  6.61s/it]

Saved content to ./data/scraped/events_test/24.txt


Crawling URLs:  95%|█████████▌| 38/40 [01:23<00:05,  2.88s/it]

Saved content to ./data/scraped/events_test/37.txt


Crawling URLs:  98%|█████████▊| 39/40 [01:26<00:03,  3.08s/it]

Saved content to ./data/scraped/events_test/38.txt


Crawling URLs: 100%|██████████| 40/40 [01:28<00:00,  2.22s/it]

Saved content to ./data/scraped/events_test/39.txt





# Data Sharding

In [None]:
import os
import re
from transformers import BartTokenizer
from tqdm import tqdm

# Load the tokenizer for the BART model
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")

# Clean the text
def clean_text(text):
    # Remove non-ASCII characters
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)
    # Remove extra whitespace
    text = ' '.join(text.split())
    return text

# Read the content of a file
def read_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# Split text into chunks of 1024 tokens (or fewer)
def split_into_token_chunks(text, max_tokens=1024):
    try:
        # Tokenize the text
        tokens = tokenizer.encode(text, truncation=False, return_tensors="pt")[0]

        # Split tokens into chunks of max_tokens
        chunks = []
        for i in range(0, len(tokens), 1000):
            chunk_tokens = tokens[i:i + 1000]

            # Decode tokens back to text
            chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)

            # Verify token count for the chunk
            chunk_token_count = len(chunk_tokens)
            if chunk_token_count > max_tokens:
                raise ValueError(f"Chunk exceeds {max_tokens} tokens: {chunk_token_count}")

            if chunk_text.strip():  # Skip empty chunks
                chunks.append(chunk_text)

        return chunks
    except Exception as e:
        print(f"Error tokenizing or splitting text: {e}")
        return []

# Save the processed text to a file
def save_processed_text(text, output_dir, file_name, chunk_index=None):
    if chunk_index is not None:
        file_name = f"{os.path.splitext(file_name)[0]}_chunk_{chunk_index}.txt"
    output_file = os.path.join(output_dir, file_name)
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(text)

# Process each text file in the directory
def process_directory(input_dir, output_dir):
    # Ensure output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate over all text files in the input directory
    for file_name in tqdm(os.listdir(input_dir)):
        if file_name.endswith(".txt"):  # Only process .txt files
            file_path = os.path.join(input_dir, file_name)
            file_base_name = os.path.splitext(file_name)[0]  # Remove file extension

            # Read the file content
            text = read_file(file_path)

            # Skip empty files
            if not text.strip():
                print(f"Skipping empty file: {file_name}")
                continue

            # Clean the text
            cleaned_text = clean_text(text)

            # Skip if cleaned text is empty
            if not cleaned_text.strip():
                print(f"Skipping file with no valid text: {file_name}")
                continue

            # Split the text into chunks of 1024 tokens (or fewer)
            chunks = split_into_token_chunks(cleaned_text, max_tokens=1024)

            # Skip if no valid chunks are produced
            if not chunks:
                print(f"Skipping file with no valid chunks: {file_name}")
                continue

            # Save each chunk as a separate file
            if len(chunks) == 1:
                # If there's only one chunk, save it as the original file name
                save_processed_text(chunks[0], output_dir, file_name)
            else:
                # If there are multiple chunks, save each chunk with a suffix
                for index, chunk in enumerate(chunks):
                    save_processed_text(chunk, output_dir, file_name, chunk_index=index)

    print("All files processed.")

# Set your input and output directories
input_directory = './data/scraped/scraped_web_text_data'  # The directory containing your text files
output_directory = './data/scraped/cleaned_and_truncated_text_data'  # Directory to save processed files

# Create the directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Process the directory
process_directory(input_directory, output_directory)

  2%|▏         | 3/159 [00:00<00:28,  5.49it/s]

Skipping empty file: 6.txt
Skipping empty file: 2.txt
Skipping empty file: 4.txt
Skipping empty file: 16.txt
Skipping empty file: 15.txt
Skipping empty file: 85.txt
Skipping empty file: 86.txt
Skipping empty file: 87.txt
Skipping empty file: 88.txt
Skipping empty file: 89.txt
Skipping empty file: 91.txt
Skipping empty file: 90.txt
Skipping empty file: 92.txt
Skipping empty file: 94.txt
Skipping empty file: 93.txt
Skipping empty file: 95.txt
Skipping empty file: 96.txt
Skipping empty file: 97.txt
Skipping empty file: 98.txt
Skipping empty file: 120.txt
Skipping empty file: 151.txt
Skipping empty file: 152.txt


100%|██████████| 159/159 [00:16<00:00,  9.81it/s]

All files processed.





# Testing: using BERT to summarize the input

In [None]:
import os
from transformers import pipeline, BartTokenizer
from tqdm import tqdm

# Load the pre-trained summarization model and tokenizer
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0)  # Use GPU (device=0) or CPU (device=-1)

# Read the content of a file
def read_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# Summarize a text chunk
def summarize_text(text, max_length=1024):
    try:
        # Skip empty or invalid text
        if not text.strip():
            raise ValueError("Empty text provided")

        # Generate the summary
        summary = summarizer(
            text,
            max_length=max_length,
            min_length=61,
            do_sample=False,
            truncation=True  # Ensure truncation is applied (though text is already truncated)
        )
        return summary[0]['summary_text']
    except Exception as e:
        print(f"Error summarizing text: {e}")
        return None

# Process each shard file in the directory and create summaries
def process_directory(input_dir, output_dir, summary_output_dir, max_summary_length=1024):
    # Ensure the output directories exist
    if not os.path.exists(summary_output_dir):
        os.makedirs(summary_output_dir)

    # Iterate over all text files (shards) in the input directory
    for file_name in tqdm(os.listdir(input_dir)):
        if file_name.endswith(".txt"):  # Only process .txt files
            file_path = os.path.join(input_dir, file_name)
            text = read_file(file_path)

            # Skip empty files
            if not text.strip():
                print(f"Skipping empty file: {file_name}")
                continue

            # Generate the summary
            summary = summarize_text(text, max_length=max_summary_length)
            if not summary:
                print(f"Failed to summarize {file_name}")
                continue

            # Save the summary
            summary_file_path = os.path.join(summary_output_dir, f"{file_name}_summary.txt")
            with open(summary_file_path, 'w', encoding='utf-8') as summary_file:
                summary_file.write(summary)

# Example usage
input_directory = './data/scraped/cleaned_and_truncated_text_data'
summary_output_directory = './data/scraped/output_summary'

process_directory(
    input_directory,
    summary_output_directory,
    summary_output_directory,
    max_summary_length=1024  # Ensure this does not exceed the model's limit
)

Device set to use cuda:0
  0%|          | 0/473 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Your max_length is set to 1024, but your input_length is only 1000. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=500)
  0%|          | 1/473 [00:03<31:05,  3.95s/it]Your max_length is set to 1024, but your input_length is only 1003. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=501)
  0%|          | 2/473 [00:04<17:19,  2.21s/it]Your max_length is set to 1024, but your input_length is only 1003. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing ma