In [1]:
import requests
from bs4 import BeautifulSoup

def crawl(url):
    response = requests.get(url)

    if response.status_code == 200:
        return response.text
    else:
        print(f"Failed to fetch the page. Status code: {response.status_code}")
        return None

url = "https://www.worlddata.info/average-bodyheight.php"
html_content = crawl(url)

if html_content:
    soup = BeautifulSoup(html_content, 'html.parser')
    pretty_html = soup.prettify()
    print(pretty_html)
else:
    print("Failed to fetch the page.")

<!DOCTYPE HTML>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Average height for men and women worldwide
  </title>
  <link href="https://js.worlddata.info" rel="preconnect"/>
  <link href="https://js.worlddata.info" rel="dns-prefetch"/>
  <link href="https://cdn.worlddata.info" rel="preconnect"/>
  <link href="https://cdn.worlddata.info" rel="dns-prefetch"/>
  <link href="https://www.laenderdaten.info/durchschnittliche-koerpergroessen.php" hreflang="de" rel="alternate"/>
  <link href="https://www.worlddata.info/average-bodyheight.php" hreflang="en" rel="alternate"/>
  <link href="https://www.datosmundial.com/estatura-promedio.php" hreflang="es" rel="alternate"/>
  <link href="https://www.dadosmundiais.com/altura-media.php" hreflang="pt" rel="alternate"/>
  <link href="https://www.donneesmondiales.com/taille-moyenne.php" hreflang="fr" rel="alternate"/>
  <link href="https://www.worlddata.info/average-bodyheight.php" id="canonical" rel="canonical"/>
  <meta content="Ho

In [2]:
import pandas as pd
def crawl_stats(url):
    response = requests.get(url)

    if response.status_code == 200:
        return response.text
    else:
        print(f"Failed to fetch the page. Status code: {response.status_code}")
        return None

url = "https://www.worlddata.info/average-bodyheight.php"
html_content = crawl_stats(url)

if html_content:
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('div', id='averageheights')

    data = []
    if table:
        rows = table.find_all('tr')

        for row in rows[1:]:
            cells = row.find_all('td')

            # Check if there are at least 4 cells in the row (including country link)
            if len(cells) >= 4:
                country = cells[0].text.strip()
                height = cells[1].text.strip()
                weight = cells[2].text.strip()
                bmi = cells[3].text.strip()

                data.append([country, height, weight, bmi])

        # Create DataFrame
        columns = ['Country', 'Average height', 'Weight', 'BMI']
        df = pd.DataFrame(data, columns=columns)
        print(df)
else:
    print("Failed to fetch the page.")

                    Country Average height   Weight   BMI
0               Netherlands         1.84 m  87.9 kg  26.1
1                Montenegro         1.83 m  90.4 kg  27.0
2                   Estonia         1.82 m  89.9 kg  27.0
3                   Denmark         1.82 m  86.8 kg  26.3
4    Bosnia and Herzegovina         1.82 m  87.1 kg  26.4
..                      ...            ...      ...   ...
245              Madagascar         1.53 m  49.2 kg  21.1
246              Bangladesh         1.52 m  50.5 kg  21.8
247                   Nepal         1.52 m  51.6 kg  22.3
248             Timor-Leste         1.52 m  49.0 kg  21.2
249               Guatemala         1.51 m  61.9 kg  27.3

[250 rows x 4 columns]


In [3]:
dfmen = df.iloc[0:125]
dfwomen = df.iloc[125:250]

In [4]:
dfmen = dfmen.rename(columns={'Average height': 'Average height men'})
dfmen = dfmen.rename(columns={'Weight': 'Weight men'})
dfmen = dfmen.rename(columns={'BMI': 'BMI men'})
dfmen

Unnamed: 0,Country,Average height men,Weight men,BMI men
0,Netherlands,1.84 m,87.9 kg,26.1
1,Montenegro,1.83 m,90.4 kg,27.0
2,Estonia,1.82 m,89.9 kg,27.0
3,Denmark,1.82 m,86.8 kg,26.3
4,Bosnia and Herzegovina,1.82 m,87.1 kg,26.4
...,...,...,...,...
120,Nepal,1.64 m,60.5 kg,22.5
121,Guatemala,1.64 m,69.1 kg,25.7
122,Yemen,1.63 m,62.5 kg,23.6
123,Laos,1.62 m,59.5 kg,22.6


In [5]:
dfwomen = dfwomen.rename(columns={'Average height': 'Average height women'})
dfwomen = dfwomen.rename(columns={'Weight': 'Weight women'})
dfwomen = dfwomen.rename(columns={'BMI': 'BMI women'})
dfwomen

Unnamed: 0,Country,Average height women,Weight women,BMI women
125,Netherlands,1.70 m,73.2 kg,25.3
126,Montenegro,1.70 m,75.3 kg,26.2
127,Denmark,1.69 m,70.2 kg,24.6
128,Iceland,1.68 m,72.6 kg,25.6
129,Estonia,1.68 m,73.7 kg,26.0
...,...,...,...,...
245,Madagascar,1.53 m,49.2 kg,21.1
246,Bangladesh,1.52 m,50.5 kg,21.8
247,Nepal,1.52 m,51.6 kg,22.3
248,Timor-Leste,1.52 m,49.0 kg,21.2


In [6]:
csv_file = "R_population_characteristics_men.csv"
dfmen.to_csv(csv_file, index=False, encoding='utf-8')
print(f"DataFrame saved to {csv_file}")
        
csv_file = "R_population_characteristics_women.csv"
dfwomen.to_csv(csv_file, index=False, encoding='utf-8')
print(f"DataFrame saved to {csv_file}")

DataFrame saved to R_population_characteristics_men.csv
DataFrame saved to R_population_characteristics_women.csv
