In [None]:
import requests
from bs4 import BeautifulSoup

# Function to scrape website content
def scrape_website(session, url):
    try:
        response = session.get(url, timeout=10)  # Request website with a timeout
        response.raise_for_status()  # Raise an exception for HTTP errors
        soup = BeautifulSoup(response.text, 'html.parser')  # Parse HTML content
        text_content = soup.get_text(separator=' ', strip=True)  # Extract text
        return text_content
    except requests.exceptions.SSLError as ssl_err:
        print(f"SSL error occurred: {ssl_err}")
    except requests.exceptions.RequestException as req_err:
        print(f"Request error occurred: {req_err}")
    except Exception as e:
        print(f"An error occurred: {e}")
    return None  # Return None if there was an error

# Function to search for the query in the scraped data
def answer_query(query, scraped_data):
    results = []
    query = query.lower()  # Convert query to lowercase for case-insensitive comparison
    for url, content in scraped_data.items():
        if query in content.lower():  # Check if the query is in the content
            results.append((url, content))  # Add matching URL and content to results
    return results

# Example usage
urls = [
    "https://www.washington.edu/",
    "https://www.stanford.edu/",
    "https://und.edu/"
]

# Create a session and scrape data from the URLs
with requests.Session() as session:
    scraped_data = {}
    for url in urls:
        content = scrape_website(session, url)  # Scrape the website content
        if content:
            print(f"Successfully scraped content from {url}")
            scraped_data[url] = content

    # Continuously ask for user input and search
    while True:
        # User query input
        user_query = input("Enter your query (or type 'exit' to quit): ")
        
        # Exit condition
        if user_query.lower() == 'exit':
            print("Exiting the program.")
            break
        
        # Find matching results
        results = answer_query(user_query, scraped_data)

        if results:
            print("\nResults found:")
            for url, content in results:
                print(f"\nFrom {url}:\n{content[:1000]}...")  # Display the first 200 characters
        else:
            print("No results found for your query.")


Successfully scraped content from https://www.washington.edu/
Successfully scraped content from https://www.stanford.edu/
Successfully scraped content from https://und.edu/


Enter your query (or type 'exit' to quit):  university



Results found:

From https://www.washington.edu/:
UW Homepage &lt;iframe src="https://www.googletagmanager.com/ns.html?id=GTM-KQ6QQBT" height="0" width="0" style="display:none;visibility:hidden" aria-hidden="true"&gt;&lt;/iframe&gt; Skip to main content MyUW Calendar Directories Libraries UW Medicine Maps UW News Helpful Links Computing/IT Workday HCM Husky Card UW Bothell UW Tacoma UW Facebook UW Twitter University of Washington University of Washington Students Parents Faculty & Staff Alumni Quick Links About About the UW Diversity Global Impact Innovation Leadership Maps Population Health Sustainability Visit Academics Academic calendar Academic departments Colleges and schools Course descriptions Registration Student guide Time schedule Apply Admissions Financial Aid Continuing education Majors Student housing Transfer students Tuition and fees Undocumented students UW Online News & Events UW News Featured stories Arts UW Calendar UW Magazine Husky sports Newsletter Research Resea

Enter your query (or type 'exit' to quit):  'exit'


No results found for your query.
