In [None]:
import requests
from bs4 import BeautifulSoup
import urllib.parse

def crawl_and_save_urls(base_url, output_file, exclude_url):
    """
    Crawl a website starting from the given base URL, extract links, and save them to a file.

    This function performs a breadth-first search (BFS)-like crawl of a website, starting from
    the base URL. It writes the visited URLs to an output file and avoids revisiting URLs.
    Additionally, it excludes URLs containing a specified substring.

    Args:
        base_url (str): The starting URL for the web crawler.
        output_file (str): The path to the file where the URLs will be saved.
        exclude_url (str): A substring to exclude URLs from crawling (e.g., external links).

    Behavior:
        - Fetches the HTML content of each page.
        - Extracts and normalizes all `<a>` tag links (using the `href` attribute).
        - Filters out already visited URLs or those containing the `exclude_url` substring.
        - Appends each visited URL to the specified output file.
        - Handles HTTP errors, parsing issues, and other exceptions gracefully.

    Raises:
        None: All exceptions are caught and logged for debugging purposes.

    Returns:
        None: The results are saved to the specified output file.
    """
    visited_urls = set()
    urls_to_visit = [base_url]
    
    # Open the file in append mode
    with open(output_file, 'a') as file:
        while urls_to_visit:
            url = urls_to_visit.pop(0)
            
            if url in visited_urls or exclude_url in url:
                continue
            
            try:
                response = requests.get(url)
                response.raise_for_status()  # Raise an error for bad responses
                soup = BeautifulSoup(response.text, 'html.parser')
            except requests.RequestException as e:
                print(f"Error fetching {url}: {e}")
                continue
            except Exception as e:
                print(f"Error parsing {url}: {e}")
                continue
            
            visited_urls.add(url)
            
            # Append the found URL to the file
            try:
                file.write(url + '\n')
                print(f"Added {url}")
            except Exception as e:
                print(f"Error writing {url} to file: {e}")
                continue
            
            # Find all anchor tags and extract their href attributes
            for link in soup.find_all('a', href=True):
                try:
                    href = link['href']
                    full_url = urllib.parse.urljoin(base_url, href)  # Construct full URL
                    if full_url.startswith(base_url) and full_url not in visited_urls and exclude_url not in full_url:
                        urls_to_visit.append(full_url)
                except Exception as e:
                    print(f"Error processing link {link}: {e}")
                    continue

if __name__ == "__main__":
    base_url = "https://www.stjohns.edu/"
    output_file = "urls.txt"
    exclude_url = "https://www.givecampus.com/"
    crawl_and_save_urls(base_url, output_file, exclude_url)
    print(f"Crawling completed. URLs saved to {output_file}")


In [None]:
import os
import mimetypes
import requests
import nest_asyncio
import asyncio
from urllib.parse import urljoin
from playwright.async_api import async_playwright
from langchain.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader
from langchain.schema import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from bs4 import BeautifulSoup
from dotenv import load_dotenv

# Apply the nest_asyncio patch
nest_asyncio.apply()

# Load your environment file
load_dotenv()

# Pinecone index configuration
index_name = os.environ.get('PINECONE_INDEX')
embeddings = OpenAIEmbeddings()
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)

# Helper functions

def get_file_type(url):
    """
    Determine the MIME (Multipurpose Internet Mail Extensions) type of a file based on its URL or filename.

    Args:
        url (str): The URL or filename to analyze.

    Returns:
        str: The MIME type of the file (e.g., 'application/pdf', 'text/html').
    """
    file_type, _ = mimetypes.guess_type(url)
    return file_type

def download_file(url, path):
    """
    Download a file from the given URL and save it to the specified path.

    Args:
        url (str): The URL of the file to download.
        path (str): The local path to save the downloaded file.

    Returns:
        str: The file path if download is successful.
        None: If the download fails.
    """
    try:
        response = requests.get(url, verify=False)
        response.raise_for_status()
        with open(path, "wb") as file:
            file.write(response.content)
        print(f"Downloaded: {url}")
        return path
    except Exception as e:
        print(f"Failed to download {url}: {e}")
        return None

async def fetch_page_content(url):
    """
    Fetch the HTML content of a webpage asynchronously.

    Args:
        url (str): The URL of the webpage to fetch.

    Returns:
        str: The HTML content of the webpage.
        None: If an error occurs during fetching.
    """
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        try:
            await page.goto(url)
            content = await page.content()
            await browser.close()
            return content
        except Exception as e:
            print(f"Failed to fetch {url}: {e}")
            await browser.close()
            return None

def clean_html(raw_html):
    """
    Clean raw HTML content by removing tags and extracting plain text.

    Args:
        raw_html (str): The raw HTML string to clean.

    Returns:
        str: The plain text extracted from the HTML.
    """
    soup = BeautifulSoup(raw_html, "html.parser")
    return soup.get_text(separator="\n", strip=True)

async def load_documents_async(url):
    """
    Load documents from a URL based on their file type.

    Args:
        url (str): The URL of the document or webpage.

    Returns:
        list: A list of Document objects with page content and metadata.
    """
    file_type = get_file_type(url)
    if file_type == "application/pdf":
        return PyPDFLoader(url).load()
    elif file_type in ["application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/msword"]:
        return UnstructuredWordDocumentLoader(url).load()
    else:
        content = await fetch_page_content(url)
        if content:
            clean_content = clean_html(content)
            metadata = {"source": url}  # Only include URL as metadata
            return [Document(page_content=clean_content, metadata=metadata)]
        else:
            return []

async def process_url_batch_async(url_batch):
    """
    Process a batch of URLs asynchronously, extracting documents and their linked files.

    Args:
        url_batch (list): A list of URLs to process.

    Returns:
        list: A list of all Document objects processed from the URLs.
    """
    all_documents = []
    for url in url_batch:
        try:
            documents = await load_documents_async(url)
            all_documents.extend(documents)

            # Process linked files in web pages
            for doc in documents:
                if "page_content" in doc.metadata:
                    soup = BeautifulSoup(doc.page_content, "html.parser")
                    for link in soup.find_all("a", href=True):
                        href = link["href"]
                        # Skip unwanted links
                        if "/email-protection#" in href:
                            print(f"Skipping link containing '/email-protection#': {href}")
                            continue
                        if href.endswith(".pdf") or href.endswith(".docx"):
                            file_url = href if href.startswith("http") else urljoin(url, href)
                            file_path = os.path.join("downloads", file_url.split("/")[-1])
                            if download_file(file_url, file_path):
                                if file_path.endswith(".pdf"):
                                    linked_docs = PyPDFLoader(file_path).load()
                                elif file_path.endswith(".docx"):
                                    linked_docs = UnstructuredWordDocumentLoader(file_path).load()
                                all_documents.extend(linked_docs)

        except Exception as e:
            print(f"Error processing {url}: {e}")
    return all_documents

def deduplicate_documents(docs):
    """
    Remove duplicate documents based on their page content.

    Args:
        docs (list): A list of Document objects.

    Returns:
        list: A list of unique Document objects.
    """
    seen = set()
    unique_docs = []
    for doc in docs:
        if doc.page_content not in seen:
            seen.add(doc.page_content)
            unique_docs.append(doc)
    return unique_docs

async def process_batches(urls, batch_size):
    """
    Process a large list of URLs in smaller batches asynchronously.

    Args:
        urls (list): The list of URLs to process.
        batch_size (int): The number of URLs to process per batch.

    Returns:
        None
    """
    for i in range(0, len(urls), batch_size):
        print(f"Processing batch {i // batch_size + 1}...")
        url_batch = urls[i:i + batch_size]

        # Process URL batch asynchronously
        all_documents = await process_url_batch_async(url_batch)

        # Deduplicate documents
        unique_docs = deduplicate_documents(all_documents)

        # Split documents into chunks
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=300)
        docs = text_splitter.split_documents(unique_docs)

        # Upload chunks to Pinecone
        try:
            PineconeVectorStore.from_documents(
                docs, index_name=index_name, embedding=embeddings
            )
            print(f"Uploaded batch {i // batch_size + 1} to Pinecone.")
        except Exception as e:
            print(f"Error uploading batch {i // batch_size + 1}: {e}")

# Main script entry point
if __name__ == "__main__":
    # Load URLs from text file
    with open("urls.txt", "r") as f:
        urls = [line.strip() for line in f if line.strip()]

    # Process URLs in batches and upload directly to Pinecone
    asyncio.run(process_batches(urls, batch_size=100))



  from tqdm.autonotebook import tqdm


Processing batch 1...
Uploaded batch 1 to Pinecone.
Processing batch 2...
Failed to fetch https://www.stjohns.edu/alumni-and-friends/get-involved/events-and-programs: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/alumni-and-friends/get-involved/events-and-programs", waiting until "load"

Failed to fetch https://www.stjohns.edu/st-johns-university-alumni-friends/get-involved/virtual-book-remembrance: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/st-johns-university-alumni-friends/get-involved/virtual-book-remembrance", waiting until "load"

Failed to fetch https://www.stjohns.edu/alumni-and-friends/get-involved/christmas-puzzle: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/alumni-and-friends/get-involved/christmas-puzzle", waiting until "load"

Failed to fetch https://www.stjohns.edu/alumni-and-friends/alumni-benefits-and-resources/career-support: Page.goto: Timeout 3

Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)


Failed to fetch https://www.stjohns.edu/news-media/st-johns-media/associate-professor-criminal-justice-and-homeland-security-james-okeefe-phd-selecting-safe-place: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/news-media/st-johns-media/associate-professor-criminal-justice-and-homeland-security-james-okeefe-phd-selecting-safe-place", waiting until "load"

Failed to fetch https://www.stjohns.edu/offices-departments/art-and-design: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/offices-departments/art-and-design", waiting until "load"

Failed to fetch https://www.stjohns.edu/offices-departments?school=11: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/offices-departments?school=11", waiting until "load"

Failed to fetch https://www.stjohns.edu/offices-departments/arts-and-humanities: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/offi

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 28 0 (offset 0)
Ignoring wrong pointing object 33 0 (offset 0)
Ignoring wrong pointing object 44 0 (offset 0)
Ignoring wrong pointing object 49 0 (offset 0)
Ignoring wrong pointing object 58 0 (offset 0)


Uploaded batch 14 to Pinecone.
Processing batch 15...
Failed to fetch https://www.stjohns.edu/files/emergency-planning: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/emergency-planning
Call log:
navigating to "https://www.stjohns.edu/files/emergency-planning", waiting until "load"

Uploaded batch 15 to Pinecone.
Processing batch 16...
Failed to fetch https://www.stjohns.edu/files/queens-campus-map-may-2022: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/queens-campus-map-may-2022
Call log:
navigating to "https://www.stjohns.edu/files/queens-campus-map-may-2022", waiting until "load"

Uploaded batch 16 to Pinecone.
Processing batch 17...
Failed to fetch https://www.stjohns.edu/news-media/news/2024-10-09/tobin-college-business-hosts-deans-national-conference: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/news-media/news/2024-10-09/tobin-college-business-hosts-deans-national-conference", waiting until "load"

Uploaded ba

Ignoring wrong pointing object 5 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 21 0 (offset 0)
Ignoring wrong pointing object 32 0 (offset 0)
Ignoring wrong pointing object 40 0 (offset 0)
Ignoring wrong pointing object 42 0 (offset 0)
Ignoring wrong pointing object 48 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)


Error processing https://www.stjohns.edu/sites/default/files/uploads/pgy2_brochure_2017.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/uploads/pgy2_brochure_2017.docx'
Failed to fetch https://www.stjohns.edu/files/academic-honor-pledge: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/academic-honor-pledge
Call log:
navigating to "https://www.stjohns.edu/files/academic-honor-pledge", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/center-counseling-and-consultation-disability-services-graduate-assistant: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/center-counseling-and-consultation-disability-services-graduate-assistant
Call log:
navigating to "https://www.stjohns.edu/files/center-counseling-and-consultation-disability-services-graduate-assistant", waiting until "load"

Uploaded batch 33 to Pinecone.
Processing batch 34...
Failed to fetch https://www.stjohns.edu/files/2023-ferpa-waiver-release-form: Page.go

Ignoring wrong pointing object 7 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)


Failed to fetch https://www.stjohns.edu/files/gender-pronouns-faculty-guide-january-2020: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/gender-pronouns-faculty-guide-january-2020
Call log:
navigating to "https://www.stjohns.edu/files/gender-pronouns-faculty-guide-january-2020", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/20221003queervibesonlyflyer: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/20221003queervibesonlyflyer
Call log:
navigating to "https://www.stjohns.edu/files/20221003queervibesonlyflyer", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/lgbtq-grand-opening-flyer: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/lgbtq-grand-opening-flyer
Call log:
navigating to "https://www.stjohns.edu/files/lgbtq-grand-opening-flyer", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/no-straight-lines-rise-queer-comics: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/no-s

Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)


Failed to fetch https://www.stjohns.edu/files/march-2021-ma-public-history-internship-handbook: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/march-2021-ma-public-history-internship-handbook
Call log:
navigating to "https://www.stjohns.edu/files/march-2021-ma-public-history-internship-handbook", waiting until "load"

Uploaded batch 46 to Pinecone.
Processing batch 47...
Failed to fetch https://www.stjohns.edu/files/bachelor-artsjuris-doctor: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/bachelor-artsjuris-doctor
Call log:
navigating to "https://www.stjohns.edu/files/bachelor-artsjuris-doctor", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/master-arts-spanish: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/master-arts-spanish
Call log:
navigating to "https://www.stjohns.edu/files/master-arts-spanish", waiting until "load"

Failed to fetch https://www.stjohns.edu/academics/faculty/james-e-mccabe: Page.goto: Timeout 30000ms

Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)


Failed to fetch https://www.stjohns.edu/files/collegiate-science-and-technology-entry-program-cstep-application: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/collegiate-science-and-technology-entry-program-cstep-application
Call log:
navigating to "https://www.stjohns.edu/files/collegiate-science-and-technology-entry-program-cstep-application", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/mcnair-scholars-program-application: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/mcnair-scholars-program-application
Call log:
navigating to "https://www.stjohns.edu/files/mcnair-scholars-program-application", waiting until "load"

Failed to fetch https://www.stjohns.edu/node/407201: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/node/407201
Call log:
navigating to "https://www.stjohns.edu/node/407201", waiting until "load"

Uploaded batch 61 to Pinecone.
Processing batch 62...
Failed to fetch https://www.stjohns.edu/files/friends-library

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 28 0 (offset 0)
Ignoring wrong pointing object 30 0 (offset 0)
Ignoring wrong pointing object 42 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 25 0 (offset 0)
Ignoring wrong pointing object 28 0 (offset 0)
Ignoring wrong pointing object 30 0 (offset 0)
Ignoring wrong po

Failed to fetch https://www.stjohns.edu/files/full-ivbec-2022-schedule: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/full-ivbec-2022-schedule
Call log:
navigating to "https://www.stjohns.edu/files/full-ivbec-2022-schedule", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/ivbec2022-official-proceedings: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/ivbec2022-official-proceedings
Call log:
navigating to "https://www.stjohns.edu/files/ivbec2022-official-proceedings", waiting until "load"

Uploaded batch 64 to Pinecone.
Processing batch 65...
Uploaded batch 65 to Pinecone.
Processing batch 66...
Failed to fetch https://www.stjohns.edu/files/graduate-assistantships-and-doctoral-fellows-manual: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/graduate-assistantships-and-doctoral-fellows-manual
Call log:
navigating to "https://www.stjohns.edu/files/graduate-assistantships-and-doctoral-fellows-manual", waiting until "load"

Failed 

Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 15 0 (offset 0)


Failed to fetch https://www.stjohns.edu/files/december-31-2021-quarterly-budget-and-expenditure-reporting-heerf-i-ii-and-iii-a1-institutional-0: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/december-31-2021-quarterly-budget-and-expenditure-reporting-heerf-i-ii-and-iii-a1-institutional-0
Call log:
navigating to "https://www.stjohns.edu/files/december-31-2021-quarterly-budget-and-expenditure-reporting-heerf-i-ii-and-iii-a1-institutional-0", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/00282300heerfq32021101021: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/00282300heerfq32021101021
Call log:
navigating to "https://www.stjohns.edu/files/00282300heerfq32021101021", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/quarterly-budget-and-expenditure-reporting-heerf-i-ii-and-iii-a1-institutional-portion: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/quarterly-budget-and-expenditure-reporting-heerf-i-ii-and-i

Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)


Uploaded batch 67 to Pinecone.
Processing batch 68...
Failed to fetch https://www.stjohns.edu/academics/schools/peter-j-tobin-college-business/departments-centers-and-faculty-research/faculty: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/academics/schools/peter-j-tobin-college-business/departments-centers-and-faculty-research/faculty", waiting until "load"

Uploaded batch 68 to Pinecone.
Processing batch 69...


Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 13 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 27 0 (offset 0)
Ignoring wrong pointing object 53 0 (offset 0)
Ignoring wrong pointing object 55 0 (offset 0)
Ignoring wrong pointing object 67 0 (offset 0)
Ignoring wrong pointing object 71 0 (offset 0)
Ignoring wrong pointing object 13 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 35 0 (offset 0)
Ignoring wrong pointing object 66 0 (offset 0)


Uploaded batch 69 to Pinecone.
Processing batch 70...
Error processing https://www.stjohns.edu/sites/default/files/uploads/dr._may_webber_bullying_article.doc: The file https://www.stjohns.edu/sites/default/files/uploads/dr._may_webber_bullying_article.doc does not exist.


Ignoring wrong pointing object 14 0 (offset 0)


Uploaded batch 70 to Pinecone.
Processing batch 71...
Failed to fetch https://www.stjohns.edu/files/baptism-brochure-2020: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/baptism-brochure-2020
Call log:
navigating to "https://www.stjohns.edu/files/baptism-brochure-2020", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/sacrament-marriage-brochure-2020: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/sacrament-marriage-brochure-2020
Call log:
navigating to "https://www.stjohns.edu/files/sacrament-marriage-brochure-2020", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/m-shawn-copeland-biography: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/m-shawn-copeland-biography
Call log:
navigating to "https://www.stjohns.edu/files/m-shawn-copeland-biography", waiting until "load"



Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)


Failed to fetch https://www.stjohns.edu/files/vcsj-women-color-lecture-1-flyer: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/vcsj-women-color-lecture-1-flyer
Call log:
navigating to "https://www.stjohns.edu/files/vcsj-women-color-lecture-1-flyer", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/september-24-poverty-conference-flyer: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/september-24-poverty-conference-flyer
Call log:
navigating to "https://www.stjohns.edu/files/september-24-poverty-conference-flyer", waiting until "load"

Error processing https://www.stjohns.edu/sites/default/files/2024-02/Acculturation_Registration_Form_2024.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/2024-02/Acculturation_Registration_Form_2024.docx'
Error processing https://www.stjohns.edu/sites/default/files/2024-02/2024_Acculturation_Verification_Form.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/

Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 28 0 (offset 0)
Ignoring wrong pointing object 30 0 (offset 0)
Ignoring wrong pointing object 40 0 (offset 0)
Ignoring wrong pointing object 42 0 (offset 0)
Ignoring wrong pointing object 60 0 (offset 0)
Ignoring wrong pointing object 83 0 (offset 0)
Ignoring wrong pointing object 90 0 (offset 0)
Ignoring wrong pointing object 96 0 (offset 0)
Ignoring wrong pointing object 166 0 (offset 0)
Ignoring wrong pointing object 185 0 (offset 0)
Ignoring wrong pointing object 187 0 (offset 0)
Ignoring wrong pointing object 189 0 (offset 0)
Ignoring wrong pointing object 204 0 (offset 0)
Ignoring wrong pointing object 206 0 (offset 0)
Ignoring wrong pointing object 210 0 (offset 0)
Ignoring wrong pointing object 214 0 (offset 0)
Ignori

Failed to fetch https://www.stjohns.edu/files/real-final-fall-2020-bias-log: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/real-final-fall-2020-bias-log
Call log:
navigating to "https://www.stjohns.edu/files/real-final-fall-2020-bias-log", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/real-final-spring-2020-bias-log: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/real-final-spring-2020-bias-log
Call log:
navigating to "https://www.stjohns.edu/files/real-final-spring-2020-bias-log", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/real-final-fall-2019-bias-log: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/real-final-fall-2019-bias-log
Call log:
navigating to "https://www.stjohns.edu/files/real-final-fall-2019-bias-log", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/real-final-spring-2019-bias-log: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/real-final-spring-2019-

Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)


Failed to fetch https://www.stjohns.edu/files/communication-and-stress-2022-flyer: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/communication-and-stress-2022-flyer
Call log:
navigating to "https://www.stjohns.edu/files/communication-and-stress-2022-flyer", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/stress-awareness-month-flyer-2022: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/stress-awareness-month-flyer-2022
Call log:
navigating to "https://www.stjohns.edu/files/stress-awareness-month-flyer-2022", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/stress-management-preparedness-plan-flyer-2022: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/stress-management-preparedness-plan-flyer-2022
Call log:
navigating to "https://www.stjohns.edu/files/stress-management-preparedness-plan-flyer-2022", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/stress-management-service-highlight-flyer-

Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 13 0 (offset 0)
Ignoring wrong pointing object 15 0 (offset 0)
Ignoring wrong pointing object 17 0 (offset 0)
Ignoring wrong pointing object 19 0 (offset 0)
Ignoring wrong pointing object 21 0 (offset 0)
Ignoring wrong pointing object 23 0 (offset 0)
Ignoring wrong pointing object 25 0 (offset 0)
Ignoring wrong pointing object 27 0 (offset 0)
Ignoring wrong pointing object 30 0 (offset 0)
Ignoring wrong 

Failed to fetch https://www.stjohns.edu/files/managing-re-entry-nervousness: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/managing-re-entry-nervousness
Call log:
navigating to "https://www.stjohns.edu/files/managing-re-entry-nervousness", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/managing-virtual-and-site-teams: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/managing-virtual-and-site-teams
Call log:
navigating to "https://www.stjohns.edu/files/managing-virtual-and-site-teams", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/rebuilding-social-connections-post-pandemic: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/rebuilding-social-connections-post-pandemic
Call log:
navigating to "https://www.stjohns.edu/files/rebuilding-social-connections-post-pandemic", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/support-working-caregivers: Page.goto: net::ERR_ABORTED at https://www.stjo

Ignoring wrong pointing object 6 0 (offset 0)


Error processing https://www.stjohns.edu/sites/default/files/2019-04/residence_life_-_time_commitment_form.doc: The file https://www.stjohns.edu/sites/default/files/2019-04/residence_life_-_time_commitment_form.doc does not exist.
Failed to fetch https://www.stjohns.edu/files/design-and-construction-furniture-policy-august-2023: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/design-and-construction-furniture-policy-august-2023
Call log:
navigating to "https://www.stjohns.edu/files/design-and-construction-furniture-policy-august-2023", waiting until "load"

Uploaded batch 78 to Pinecone.
Processing batch 79...
Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/Basilio%20Monteiro%20CV%202015-1.doc: The file https://www.stjohns.edu/sites/default/files/faculty/cv/Basilio%20Monteiro%20CV%202015-1.doc does not exist.
Failed to fetch https://www.stjohns.edu/?school=26&page=0: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.e

Ignoring wrong pointing object 8 0 (offset 0)
Overwriting cache for 0 2266
Overwriting cache for 0 717


Uploaded batch 80 to Pinecone.
Processing batch 81...
Uploaded batch 81 to Pinecone.
Processing batch 82...
Failed to fetch https://www.stjohns.edu/academics/faculty/stanley-d-stone: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/academics/faculty/stanley-d-stone", waiting until "load"

Failed to fetch https://www.stjohns.edu/academics/faculty/brandon-w-sweitzer: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/academics/faculty/brandon-w-sweitzer", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/life-class-1-certificate-fly960512-sju: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/life-class-1-certificate-fly960512-sju
Call log:
navigating to "https://www.stjohns.edu/files/life-class-1-certificate-fly960512-sju", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/life-class-2-certificate-fly960512-sju: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/fil

Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 34 0 (offset 0)


Uploaded batch 86 to Pinecone.
Processing batch 87...
Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/CORINNECV%20SJU%20e-portfolio-1.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/faculty/cv/CORINNECV%20SJU%20e-portfolio-1.docx'
Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/2020.12.08%20Resume%20%28Adrian%20Alvarez%29.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/faculty/cv/2020.12.08%20Resume%20%28Adrian%20Alvarez%29.docx'


Ignoring wrong pointing object 52 0 (offset 0)
Ignoring wrong pointing object 53 0 (offset 0)
Ignoring wrong pointing object 72 0 (offset 0)
Ignoring wrong pointing object 73 0 (offset 0)
Ignoring wrong pointing object 162 0 (offset 0)
Ignoring wrong pointing object 163 0 (offset 0)
Ignoring wrong pointing object 132 0 (offset 0)
Ignoring wrong pointing object 133 0 (offset 0)
Ignoring wrong pointing object 102 0 (offset 0)
Ignoring wrong pointing object 103 0 (offset 0)
Ignoring wrong pointing object 102 0 (offset 0)
Ignoring wrong pointing object 103 0 (offset 0)
Ignoring wrong pointing object 172 0 (offset 0)
Ignoring wrong pointing object 173 0 (offset 0)
Ignoring wrong pointing object 102 0 (offset 0)
Ignoring wrong pointing object 103 0 (offset 0)


Uploaded batch 87 to Pinecone.
Processing batch 88...
Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/2017vitae-1.doc: The file https://www.stjohns.edu/sites/default/files/faculty/cv/2017vitae-1.doc does not exist.
Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/Pengli_Bu_Curriculum_Vitae_9-13-2022.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/faculty/cv/Pengli_Bu_Curriculum_Vitae_9-13-2022.docx'


Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 23 0 (offset 0)
Ignoring wrong pointing object 25 0 (offset 0)
Ignoring wrong pointing object 31 0 (offset 0)
Ignoring wrong pointing object 34 0 (offset 0)
Ignoring wrong pointing object 49 0 (offset 0)


Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/ownvt2019-1.doc: The file https://www.stjohns.edu/sites/default/files/faculty/cv/ownvt2019-1.doc does not exist.
Uploaded batch 88 to Pinecone.
Processing batch 89...
Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/Chen-ZS-050616-1.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/faculty/cv/Chen-ZS-050616-1.docx'


Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 35 0 (offset 0)
Ignoring wrong pointing object 58 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 29 0 (offset 0)
Ignoring wrong pointing object 38 0 (offset 0)
Ignoring wrong pointing object 40 0 (offset 0)
Ignoring wrong pointing object 42 0 (offset 0)
Ignoring wrong pointing object 54 0 (offset 0)
Ignoring wrong pointing object 56 0 (offset 0)


Uploaded batch 89 to Pinecone.
Processing batch 90...
Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/ian_m_miller_cv_spring_2017.doc: The file https://www.stjohns.edu/sites/default/files/faculty/cv/ian_m_miller_cv_spring_2017.doc does not exist.
Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/Rustomji%20CV%20Condensed.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/faculty/cv/Rustomji%20CV%20Condensed.docx'
Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/vapnek_cv_spring_2017.doc: The file https://www.stjohns.edu/sites/default/files/faculty/cv/vapnek_cv_spring_2017.doc does not exist.
Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/sacca_cv.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/faculty/cv/sacca_cv.docx'
Uploaded batch 90 to Pinecone.
Processing batch 91...
Error processing https://www.stjohns.edu/sites/default/files/facul

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 43 0 (offset 0)
Ignoring wrong pointing object 61 0 (offset 0)


Failed to fetch https://www.stjohns.edu/files/2022-request-non-academic-graduate-assistant-form: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/2022-request-non-academic-graduate-assistant-form
Call log:
navigating to "https://www.stjohns.edu/files/2022-request-non-academic-graduate-assistant-form", waiting until "load"

Error processing https://www.stjohns.edu/sites/default/files/2022-04/2022%20Request%20for%20Non-Academic%20GA%20Updated%20form%20%285%29_0.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/2022-04/2022%20Request%20for%20Non-Academic%20GA%20Updated%20form%20%285%29_0.docx'
Failed to fetch https://www.stjohns.edu/files/2021-non-academic-ga-epcf: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/2021-non-academic-ga-epcf
Call log:
navigating to "https://www.stjohns.edu/files/2021-non-academic-ga-epcf", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/2023-new-faculty-brochure: Page.goto: net::ERR_ABOR

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 23 0 (offset 0)


Failed to fetch https://www.stjohns.edu/files/undergrad-applied-finance-fellowship-application: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/undergrad-applied-finance-fellowship-application
Call log:
navigating to "https://www.stjohns.edu/files/undergrad-applied-finance-fellowship-application", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/graduate-applied-finance-fellows-application: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/graduate-applied-finance-fellows-application
Call log:
navigating to "https://www.stjohns.edu/files/graduate-applied-finance-fellows-application", waiting until "load"

Uploaded batch 106 to Pinecone.
Processing batch 107...
Failed to fetch https://www.stjohns.edu/academics/faculty/colleen-m-ducey: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/academics/faculty/colleen-m-ducey", waiting until "load"

Failed to fetch https://www.stjohns.edu/academics/faculty/keith-m-pre

Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)


Uploaded batch 109 to Pinecone.
Processing batch 110...
Failed to fetch https://www.stjohns.edu/node/6496: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/node/6496", waiting until "load"

Failed to fetch https://www.stjohns.edu/law/about/places/tenants-rights-advocacy-clinic: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/law/about/places/tenants-rights-advocacy-clinic", waiting until "load"

Failed to fetch https://www.stjohns.edu/law/about/places/consumer-justice-elderly-litigation-clinic: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/law/about/places/consumer-justice-elderly-litigation-clinic", waiting until "load"

Failed to fetch https://www.stjohns.edu/law/about/subscribe-our-newsletter: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/law/about/subscribe-our-newsletter", waiting until "load"



Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 13 0 (offset 0)
Ignoring wrong pointing object 15 0 (offset 0)
Ignoring wrong pointing object 17 0 (offset 0)
Ignoring wrong pointing object 19 0 (offset 0)


Failed to fetch https://www.stjohns.edu/admission/tuition-and-financial-aid/: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/admission/tuition-and-financial-aid/", waiting until "load"



Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 15 0 (offset 0)
Ignoring wrong pointing object 48 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 21 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 23 0 (offset 0)
Ignoring wrong pointing object 28 0 (offset 0)
Ignoring wrong pointing object 30 0 (offset 0)
Ignoring wrong pointing object 55 0 (offset 0)
Ignoring wrong pointing object 60 0 (offset 0)
Ignoring wrong pointing object 68 0 (offset 0)
Ignoring wrong pointing object 70 0 (offset 0)
Ignoring wrong pointing object 72 0 (offset 0)
Ignoring wrong pointing object 90 0 (offset 0)


Uploaded batch 110 to Pinecone.
Processing batch 111...
Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/CV%20William%20Reisel%20042920-1.doc: The file https://www.stjohns.edu/sites/default/files/faculty/cv/CV%20William%20Reisel%20042920-1.doc does not exist.


Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)


Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/Dr.%20Reznik%20CV_2019.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/faculty/cv/Dr.%20Reznik%20CV_2019.docx'
Uploaded batch 111 to Pinecone.
Processing batch 112...
Error processing https://www.stjohns.edu/sites/default/files/uploads/psyd-5yr-intern-application.doc: The file https://www.stjohns.edu/sites/default/files/uploads/psyd-5yr-intern-application.doc does not exist.


Advanced encoding /SymbolSetEncoding not implemented yet
Advanced encoding /SymbolSetEncoding not implemented yet
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 40 0 (offset 0)
Ignoring wrong pointing object 43 0 (offset 0)
Ignoring wrong pointing object 113 0 (offset 0)
Ignoring wrong pointing object 566 0 (offset 0)
Ignoring wrong pointing object 568 0 (offset 0)
Ignoring wrong pointing object 570 0 (offset 0)
Ignoring wrong pointing object 576 0 (offset 0)
Ignoring wrong pointing object 578 0 (offset 0)
Ignoring wrong pointing object 36 0 (offset 0)
Ignoring wrong pointing object 118 0 (offset 0)
Ignoring wrong pointing object 381 0 (offset 0)
Ignoring wrong pointing object 598 0 (offset 0)
Ignoring wrong pointing object 600 0 (offset 0)
Ignoring wrong pointing object 602 0 (offset 0)
Ignoring wrong pointing object 612 0 (offset 0)
Ignoring wrong pointing object 634 0 (offset 0)
Ignoring wrong pointing obje

Failed to fetch https://www.stjohns.edu/files/bachelor-arts-economics: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/bachelor-arts-economics
Call log:
navigating to "https://www.stjohns.edu/files/bachelor-arts-economics", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/bachelor-arts-english: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/bachelor-arts-english
Call log:
navigating to "https://www.stjohns.edu/files/bachelor-arts-english", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/bachelor-arts-and-bachelor-science-environmental-studies: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/bachelor-arts-and-bachelor-science-environmental-studies
Call log:
navigating to "https://www.stjohns.edu/files/bachelor-arts-and-bachelor-science-environmental-studies", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/bachelor-arts-french: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/b

Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 15 0 (offset 0)
Ignoring wrong pointing object 27 0 (offset 0)


Failed to fetch https://www.stjohns.edu/files/review-business-january-2018: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/review-business-january-2018
Call log:
navigating to "https://www.stjohns.edu/files/review-business-january-2018", waiting until "load"



Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 5 0 (offset 0)
Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)


Error processing https://www.stjohns.edu/sites/default/files/2020-06/GDC%20Faculty%20Course%20Proposal%20REV.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/2020-06/GDC%20Faculty%20Course%20Proposal%20REV.docx'
Error processing https://www.stjohns.edu/sites/default/files/2023-04/GDC%20Budget%20Planning%20Worksheet%20-%20SAMPLE%20for%20Faculty.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/2023-04/GDC%20Budget%20Planning%20Worksheet%20-%20SAMPLE%20for%20Faculty.docx'
Failed to fetch https://www.stjohns.edu/files/ems-institute-faqs: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/ems-institute-faqs
Call log:
navigating to "https://www.stjohns.edu/files/ems-institute-faqs", waiting until "load"

Error processing https://www.stjohns.edu/sites/default/files/faculty/cv/Curriculum_Vitae_SEI_HIGUCHI_09.13.2022.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/faculty/cv/Curriculum_Vitae_SEI_HI

Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 40 0 (offset 0)
Ignoring wrong pointing object 52 0 (offset 0)


Failed to fetch https://www.stjohns.edu/academics/schools-and-colleges/st-johns-college-liberal-arts-and-sciences/environmental-studies: Page.goto: Timeout 30000ms exceeded.
Call log:
navigating to "https://www.stjohns.edu/academics/schools-and-colleges/st-johns-college-liberal-arts-and-sciences/environmental-studies", waiting until "load"

Failed to fetch https://www.stjohns.edu/academics/schools-and-colleges/st-johns-college-liberal-arts-and-sciences/theology-and-religious-studies: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/academics/schools-and-colleges/st-johns-college-liberal-arts-and-sciences/theology-and-religious-studies
Call log:
navigating to "https://www.stjohns.edu/academics/schools-and-colleges/st-johns-college-liberal-arts-and-sciences/theology-and-religious-studies", waiting until "load"

Failed to fetch https://www.stjohns.edu/academics/commencement/class-2020/virtual-recognition-ceremonies/gradslides: Page.goto: Timeout 30000ms exceeded.
Call log:
navigatin

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 31 0 (offset 0)
Ignoring wrong pointing object 39 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)


Error processing https://www.stjohns.edu/sites/default/files/2024-07/Fall%202024%20Accommodation%20Tip%20Sheet.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/2024-07/Fall%202024%20Accommodation%20Tip%20Sheet.docx'
Error processing https://www.stjohns.edu/sites/default/files/2024-10/Bar%20Exam%20Accommodation%20Tip%20Sheet%202025-2026.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/2024-10/Bar%20Exam%20Accommodation%20Tip%20Sheet%202025-2026.docx'
Uploaded batch 143 to Pinecone.
Processing batch 144...
Uploaded batch 144 to Pinecone.
Processing batch 145...
Uploaded batch 145 to Pinecone.
Processing batch 146...
Error processing https://www.stjohns.edu/sites/default/files/2024-08/JD%20Advising%20Guide%20updated%208.13.24%20-%20A%20%281%29.docx: no such file or directory: 'https://www.stjohns.edu/sites/default/files/2024-08/JD%20Advising%20Guide%20updated%208.13.24%20-%20A%20%281%29.docx'
Failed to fetch https://www.stjohns.edu

Ignoring wrong pointing object 7 0 (offset 0)
Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 13 0 (offset 0)
Ignoring wrong pointing object 15 0 (offset 0)
Ignoring wrong pointing object 17 0 (offset 0)
Ignoring wrong pointing object 19 0 (offset 0)
Ignoring wrong pointing object 21 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 34 0 (offset 0)
Ignoring wrong pointing object 36 0 (offset 0)
Ignoring wrong pointing object 38 0 (offset 0)
Ignoring wrong pointing object 42 0 (offset 0)
Ignoring wrong pointing object 46 0 (offset 0)
Ignoring wrong pointing object 49 0 (offset 0)
Ignoring wrong pointing object 54 0 (offset 0)
Ignoring wrong pointing object 62 0 (offset 0)
Ignoring wrong pointing object 66 0 (offset 0)


Failed to fetch https://www.stjohns.edu/files/soe-2020-commencement-program: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/soe-2020-commencement-program
Call log:
navigating to "https://www.stjohns.edu/files/soe-2020-commencement-program", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/tcb-2020-virtual-commencement-program: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/tcb-2020-virtual-commencement-program
Call log:
navigating to "https://www.stjohns.edu/files/tcb-2020-virtual-commencement-program", waiting until "load"

Failed to fetch https://www.stjohns.edu/files/tcb-2020-commencement-program: Page.goto: net::ERR_ABORTED at https://www.stjohns.edu/files/tcb-2020-commencement-program
Call log:
navigating to "https://www.stjohns.edu/files/tcb-2020-commencement-program", waiting until "load"

Error processing https://www.stjohns.edu/sites/default/files/2024-07/24_Law%20School_Commencement_Awards_Honors.docx: no such file or directory: 