In [33]:
import requests
from bs4 import BeautifulSoup

def scrape_and_save(url, filename):
    # Define headers to mimic a browser request
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }

    # Send a GET request to the URL with headers
    response = requests.get(url, headers=headers)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content of the webpage
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Find all elements with the specified class
        main_content = soup.findAll("div", class_="entry-content reg")
        
        # Initialize an empty string to store the extracted text
        extracted_text = ""
        
        # Iterate over each element in the ResultSet
        for content in main_content:
            # Extract text without HTML tags for each element
            text_without_tags = content.get_text(separator="\n")
            
            # Append the extracted text to the string
            extracted_text += text_without_tags + "\n"
        
        # Save the extracted text to a text file
        with open(filename, "w", encoding="utf-8") as file:
            file.write(extracted_text)
            
        print(f"Data extracted and saved to '{filename}'")
    else:
        print("Failed to retrieve data from the URL:", url)

# List of URLs and corresponding filenames
urls = [
    ("https://international.utk.edu/visa-funding/", "training_data/visa-funding.txt"),
    ("https://international.utk.edu/visa-denials-delays-and-administrative-processing/", "training_data/visa-denial-and-delays.txt"),
    ("https://international.utk.edu/arriving-in-knoxville/", "training_data/arriving-in-knoxville.txt"),
    ("https://international.utk.edu/international-students-and-scholars-services/current-students/advising/", "training_data/immigration-advising.txt"),
    ("https://international.utk.edu/international-students-and-scholars-services/current-students/maintaining-status/full-time-enrollment/", "training_data/full-time-enrollment.txt"),
    ("https://international.utk.edu/international-students-and-scholars-services/current-students/maintaining-status/local-address-requirement/", "training_data/local-address-requirement.txt"),
    ("https://international.utk.edu/international-students-and-scholars-services/current-students/maintaining-status/dependents-spouse-and-children/", "training_data/dependents.txt"),
    ("https://international.utk.edu/international-students-and-scholars-services/current-students/maintaining-status/consequences-of-not-maintaining-status/", "training_data/maintaining-visa-status.txt"),
    ("https://international.utk.edu/international-students-and-scholars-services/current-students/traveling-inside-and-outside-the-u-s/", "training_data/traveling-while-on-visa.txt"),
    ("https://international.utk.edu/international-students-and-scholars-services/current-students/health-care-and-insurance/affordable-health-care-options/", "training_data/healthcare-at-utk.txt"),
    ("https://international.utk.edu/international-students-and-scholars-services/current-students/transferring-out-of-utk/", "training_data/transfer-from-utk.txt"),
    ("https://international.utk.edu/training/on-campus-employment/", "training_data/campus-employment.txt"),
    ("https://international.utk.edu/training/internships-and-practical-training/", "training_data/internships.txt"),
    ("https://international.utk.edu/training/academic-training-for-j-1-students/", "training_data/training-on-j1.txt"),
    ("https://international.utk.edu/training/optional-practical-training-opt-for-f-1-students/", "training_data/opt-on-f1.txt"),
]

# Loop through each URL and filename pair
for url, filename in urls:
    scrape_and_save(url, filename)


Data extracted and saved to 'training_data/visa-funding.txt'
Data extracted and saved to 'training_data/visa-denial-and-delays.txt'
Data extracted and saved to 'training_data/arriving-in-knoxville.txt'
Data extracted and saved to 'training_data/immigration-advising.txt'
Data extracted and saved to 'training_data/full-time-enrollment.txt'
Data extracted and saved to 'training_data/local-address-requirement.txt'
Data extracted and saved to 'training_data/dependents.txt'
Data extracted and saved to 'training_data/maintaining-visa-status.txt'
Data extracted and saved to 'training_data/traveling-while-on-visa.txt'
Data extracted and saved to 'training_data/healthcare-at-utk.txt'
Data extracted and saved to 'training_data/transfer-from-utk.txt'
Data extracted and saved to 'training_data/campus-employment.txt'
Data extracted and saved to 'training_data/internships.txt'
Data extracted and saved to 'training_data/training-on-j1.txt'
Data extracted and saved to 'training_data/opt-on-f1.txt'
