<a href="https://colab.research.google.com/github/shannanliew/shannanliew.github.io/blob/main/obesity_rates_countries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from google.colab import files

# Set the URL to get our data from
url = "https://worldpopulationreview.com/country-rankings/most-obese-countries"

# Send a GET request to fetch the page content
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content with BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")

    # Locate the main <tbody> containing the full list of countries
    table_body = soup.find("tbody", {"class": "relative z-10 text-sm"})

    # Lists to store country names and obesity rates
    countries = []
    obesity_rates = []

    # Iterate through each row in the table body
    for row in table_body.find_all("tr"):
        # Get the country name from the <th> element
        country = row.find("th").text.strip()

        # Get all <td> elements in the row
        cells = row.find_all("td")

        # Check if there are at least 4 <td> elements, to avoid index errors
        if len(cells) >= 4:
            obesity_rate_both = cells[3].text.strip()  # Extract the 4th <td> element
        else:
            obesity_rate_both = ""  # Assign empty string if the data is missing

        # Append data to the lists
        countries.append(country)
        obesity_rates.append(obesity_rate_both)

    # Create a DataFrame with the scraped data
    df = pd.DataFrame({
        "Country": countries,
        "Obesity Rate - Both (%)": obesity_rates
    })

    # Remove rows with missing or empty values in the "Obesity Rate - Both (%)" column
    df = df[df["Obesity Rate - Both (%)"] != ""]
    df.reset_index(drop=True, inplace=True)

    # Display the DataFrame without rows with missing data
    print(df)

    # Export the DataFrame to a CSV file
    df.to_csv("obesity_rates_countries.csv", index=False)

    files.download("obesity_rates_countries.csv")

else:
    print("Failed to retrieve the page. Status code:", response.status_code)

                              Country Obesity Rate - Both (%)
0                        Cook Islands                   55.9%
1                               Nauru                     61%
2                                Niue                     50%
3                               Samoa                   47.3%
4                               Tonga                   48.2%
5                              Tuvalu                   51.6%
6                            Kiribati                     46%
7                         Saint Lucia                   19.7%
8                          Micronesia                   45.8%
9                               Egypt                     32%
10                             Kuwait                   37.9%
11                   Marshall Islands                   52.9%
12              Saint Kitts and Nevis                   22.9%
13                              Palau                   55.3%
14                              Qatar                   35.1%
15      

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>