This is a step by step guide for web scraping using BeautifulSoup

In [None]:
import requests
from bs4 import BeautifulSoup
import re
import csv

In [None]:
categories_path = [
    "Immobilier", 
    "ImmoNeuf/Immobilier%20Neuf"
]
base_url = "https://www.tayara.tn/ads/c/{}/?page={}"

for category in categories_path:
    for page in range(1, 5):  # Limiter à 5 pages pour l'exemple
        url = base_url.format(category, page)
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        # annonces = soup.find_all('div', class_='relative -z-40')

In [None]:
# permet de retourner tous les liens de chaque immobilier
# vs pouvez inspecter https://www.tayara.tn/item/66eae8a467b755ba922a2a58/Immobilier%20Neuf/Ariana/Ghazela/Appartement_en_S2_de_12880_m_A41_au_4me_tage/
def get_all_links(soup):
    property_links = []
    for article in soup.find_all('article', class_="mx-0"):
        link = article.find('a')['href']
        if '/item/' in link:
            full_url = "https://www.tayara.tn" + link
            property_links.append(full_url)
    return property_links

In [None]:
all_fieldnames = set()

def crawl_property_page(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    item_info = {}

    try:
        # Titre
        title = soup.find('h1', class_='text-gray-700 font-bold text-2xl font-arabic').text.strip()

        # Configuration des pièces
        pieces = re.search(r'S\+\d+', title)
        pieces_value = pieces.group() if pieces else None

        # Récupérer les parties du prix
        price_parts = soup.find_all('span', class_='mr-1')
        price = ''.join([part.text.strip() for part in price_parts if part.text.strip().isdigit()])

        # Localisation
        location = soup.find('div', class_='flex items-center space-x-2 mb-1').text.strip().split(',')[0].strip()

        # Ajouter les informations de base
        item_info['Annonce'] = title
        item_info['Prix'] = price
        item_info['Localisation'] = location
        item_info['Configuration des pièces'] = pieces_value

        # Ajouter dynamiquement les nouvelles clés à item_info
        criterias = soup.find_all('span', class_='text-gray-600/80 text-2xs md:text-xs lg:text-xs font-medium')
        values = soup.find_all('span', class_='text-gray-700/80 text-xs md:text-sm lg:text-sm font-semibold')

        # Associer chaque critère à sa valeur
        for criteria, value in zip(criterias, values):
            criterion_text = criteria.text.strip()
            value_text = value.text.strip()

            if criterion_text == "":
                criterion_text = "Autre"

            # Ajouter les nouvelles clés à item_info
            item_info[criterion_text] = value_text

            # Mettre à jour la liste globale des fieldnames
            all_fieldnames.add(criterion_text)

        return item_info

    except AttributeError:
        print("Une information n'a pas pu être extraite correctement.")
        return None

In [None]:
property_links = get_all_links(soup)
 # Exemple avec le lien https://www.tayara.tn/item/66eae8a467b755ba922a2a58/Immobilier%20Neuf/Ariana/Ghazela/Appartement_en_S2_de_12880_m_A41_au_4me_tage/
data = crawl_property_page(property_links[0])
print(data)

In [None]:
def save_to_csv(data, filename="immobiliers.csv"):
    if not data:
        print("Aucune donnée à sauvegarder.")
        return
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        fieldnames = data[0].keys() # features
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for row in data:
            writer.writerow(row)

In [None]:
# test sur un seul élément
# save_to_csv(data, "immobiliers.csv")

In [None]:
data = []
for link in property_links:
    data.append(crawl_property_page(link))
save_to_csv(data, "immobiliers.csv")