In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
url = "https://fr.wikipedia.org/wiki/Liste_des_pays_par_PIB_nominal"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0",
    "Accept-Language": "en-US,en;q=0.9",
}

try:
    response = requests.get(url, headers=headers)
    response.raise_for_status()
except Exception as e:
    print("Failed to retrieve the webpage", response.status_code)
    exit()


In [3]:
response =requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

In [4]:
table = soup.find_all('table', class_="sticky-header-multi")
data = []

In [None]:
pib_1980_1989 = table[0].text
pib_1990_1999 = table[1].text
pib_2000_2009 = table[2].text
pib_2010_2019 = table[3].text
pib_2020_2023 = table[4].text

In [43]:
def convert_to_dataframe(data_string):
    """
    Converts a string to a pandas DataFrame
    """
    # Split lines and clean
    lines = [line.strip() for line in data_string.split('\n') if line.strip()]
    
    # The first 11 lines are the columns (Country/Zone + 10 years)
    columns = lines[:11]
    
    # The rest is data
    data = []
    i = 11
    while i < len(lines):
        country = lines[i]
        # Take the next 10 values (for the 10 years)
        values = lines[i+1:i+11]
        data.append([country] + values)
        i += 11
    
    # Create pandas DataFrame
    df = pd.DataFrame(data, columns=columns)
    return df

In [None]:
df_1980_1989 = convert_to_dataframe(pib_1980_1989)
df_1990_1999 = convert_to_dataframe(pib_1990_1999)
df_2000_2009 = convert_to_dataframe(pib_2000_2009)
df_2010_2019 = convert_to_dataframe(pib_2010_2019)

In [46]:
df_2000_2009.head()

Unnamed: 0,Pays/Zone,PIB 2000,PIB 2001,PIB 2002,PIB 2003,PIB 2004,PIB 2005,PIB 2006,PIB 2007,PIB 2008,PIB 2009
0,Afghanistan,-,-,4367,4553,5146,6167,6925,8556,10297,12066
1,Albanie,3483,3928,4348,5611,7185,8052,8896,10677,12881,12044
2,Algérie,54749,54745,56761,67864,85332,103198,117027,134977,171001,137211
3,Andorre,1429,1547,1758,2362,2896,3158,3456,3952,4082,3675
4,Angola,11166,1093,15286,17813,23552,36971,52381,65266,88539,70307


In [53]:
# Séparer les lignes et nettoyer
lignes = [ligne.strip() for ligne in pib_2020_2023.split('\n') if ligne.strip()]

# Les 11 premières lignes sont les colonnes (Pays/Zone + 10 années)
colonnes = lignes[:5]

# Le reste sont les données
donnees = []
i = 5
while i < len(lignes):
    pays = lignes[i]
    # Prendre les 10 valeurs suivantes (pour les 10 années)
    valeurs = lignes[i+1:i+5]
    donnees.append([pays] + valeurs)
    i += 5  # On saute de 12 (1 pays + 10 valeurs + 1 ligne vide)

# Créer le DataFrame pandas
df_2020_2023 = pd.DataFrame(donnees, columns=colonnes)
df_2020_2023.head()

Unnamed: 0,Pays/Zone,PIB 2020,PIB 2021,PIB 2022,PIB 2023
0,Afghanistan,20136,14941,-,-
1,Albanie,15192,17984,19083,23032
2,Algérie,145656,163138,19506,224107
3,Andorre,2885,3325,3352,3692
4,Angola,57139,74861,122781,93796


In [59]:
full_pib_df = pd.merge(df_1980_1989, df_1990_1999, on="Pays/Zone", how="outer")
full_pib_df = pd.merge(full_pib_df, df_2000_2009, on="Pays/Zone", how="outer")
full_pib_df = pd.merge(full_pib_df, df_2010_2019, on="Pays/Zone", how="outer")
full_pib_df = pd.merge(full_pib_df, df_2020_2023, on="Pays/Zone", how="outer")
full_pib_df.head()

Unnamed: 0,Pays/Zone,PIB 1980,PIB 1981,PIB 1982,PIB 1983,PIB 1984,PIB 1985,PIB 1986,PIB 1987,PIB 1988,...,PIB 2014,PIB 2015,PIB 2016,PIB 2017,PIB 2018,PIB 2019,PIB 2020,PIB 2021,PIB 2022,PIB 2023
0,Afghanistan,-,-,-,-,-,-,-,-,-,...,20616,20057,1802,18883,18401,18876,20136,14941,-,-
1,Afrique,465984,475726,481617,462898,404801,397355,43664,48989,520383,...,"2 650,979","2 422,14","2 303,976","2 317,348","2 482,1","2 542,636","2 457,79","2 745,183","2 945,709","2 858,342"
2,Afrique du Sud,89412,9315,85897,96205,84838,64481,73399,96533,10401,...,381195,346663,323493,381317,405093,389245,338193,420009,405106,380906
3,Albanie,1946,2229,2296,2319,229,2339,2587,2566,253,...,13246,11389,11862,13053,15157,15399,15192,17984,19083,23032
4,Algérie,42346,44372,4478,47529,51513,61132,61535,633,51664,...,21381,165979,160034,167498,174868,171673,145656,163138,19506,224107


In [60]:
full_pib_df.to_csv("pib_data.csv", index='False', encoding="utf-8")