TASK1

In [4]:
import requests
from bs4 import BeautifulSoup
import csv

# URL of the page to scrape
url = 'https://www.scrapethissite.com/pages/simple/'

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')

# Find all country entries
countries = soup.find_all('div', class_='country')

# Prepare data for CSV
country_data = []

for country in countries:
    name = country.find('h3').text.strip()
    
    # Using try-except to handle potential NoneType issues
    try:
        capital = country.find('p', class_='capital').text.split(': ')[1].strip()
    except (AttributeError, IndexError):
        capital = 'N/A'  # Default value if not found

    try:
        population = country.find('p', class_='population').text.split(': ')[1].strip()
    except (AttributeError, IndexError):
        population = 'N/A'  # Default value if not found

    try:
        area = country.find('p', class_='area').text.split(': ')[1].strip()
    except (AttributeError, IndexError):
        area = 'N/A'  # Default value if not found

    # Append the data to the list
    country_data.append([name, capital, population, area])

# Define CSV file name
csv_file = 'countries_data.csv'

# Write data to CSV file
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    # Write the header
    writer.writerow(['Country', 'Capital', 'Population', 'Area (kmÂ²)'])
    # Write the country data
    writer.writerows(country_data)

print(f"Data for {len(country_data)} countries has been written to {csv_file}.")

Data for 250 countries has been written to countries_data.csv.


TASK:2

In [7]:
import requests
from bs4 import BeautifulSoup
import csv

# URL of the page to scrape
url = "https://www.scrapethissite.com/pages/forms/?page_num=1"

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the table containing the hockey team data
    table = soup.find('table', class_='table')

    # Open a CSV file to write the data
    with open('hockey_teams_data.csv', mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Write the header row
        writer.writerow(['Team Name', 'Year', 'Wins', 'Losses', 'Win%', 'Goals For (GF)', 'Goals Against (GA)'])

        # Check if the table is found
        if table is not None:
            # Iterate through each row in the table
            for row in table.find_all('tr')[1:]:  # Skip the header row
                columns = row.find_all('td')
                if len(columns) >= 7:  # Ensure there are enough columns
                    team_name = columns[0].text.strip()
                    year = columns[1].text.strip()
                    wins = columns[2].text.strip()
                    losses = columns[3].text.strip()
                    win_percentage = columns[4].text.strip()
                    goals_for = columns[5].text.strip()
                    goals_against = columns[6].text.strip()

                    # Write the data to the CSV file
                    writer.writerow([team_name, year, wins, losses, win_percentage, goals_for, goals_against])

            print("Data has been successfully scraped and saved to 'hockey_teams_data.csv'.")
        else:
            print("Table not found on the page.")
else:
    print(f"Failed to retrieve data from the page. Status code: {response.status_code}")


Data has been successfully scraped and saved to 'hockey_teams_data.csv'.
