In [1]:
# importing libraries
import requests
from bs4 import BeautifulSoup
import time
import csv
import pandas as pd



In [2]:

def scrape_mubawab_listings():
    base_url = "https://www.mubawab.tn/fr/sc/appartements-a-vendre"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Accept-Encoding": "gzip, deflate",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
    }
    
    session = requests.Session()
    session.headers.update(headers)
    
    try:
        response = session.get(base_url)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Look for different possible listing containers
        listing_selectors = [
            'div.listingBox',
            'div[class*="listing"]',
            'div[class*="property"]',
            'div[class*="ad"]'
        ]
        
        listings = []
        for selector in listing_selectors:
            found = soup.select(selector)
            if found:
                print(f"Found {len(found)} listings with selector: {selector}")
                listings = found
                break
        
        if not listings:
            print("No listings found with common selectors. Let's examine the page structure...")
            # Print some of the HTML to understand structure
            print("First 2000 characters of the page:")
            print(soup.prettify()[:2000])
            return []
        
        # Extract links from listings
        property_links = []
        for listing in listings:
            # Try different ways to find links
            link_element = listing.find('a', href=True)
            if link_element:
                href = link_element['href']
                if href.startswith('/'):
                    href = 'https://www.mubawab.tn' + href
                property_links.append(href)
        
        return property_links
        
    except requests.RequestException as e:
        print(f"Request failed: {e}")
        return []



In [3]:
# Run the scraper
links = scrape_mubawab_listings()
print(f"\nTotal property links found: {len(links)}")
if links:
    print("First 5 links:")
    for i, link in enumerate(links[:5], 1):
        print(f"{i}. {link}")

Found 35 listings with selector: div.listingBox

Total property links found: 34
First 5 links:
1. https://www.mubawab.tn/fr/a/8212454/appartement-s2-meubl%C3%A9-en-location
2. https://www.mubawab.tn/fr/a/8211784/appartement-s3-en-vente-lac-2
3. https://www.mubawab.tn/fr/a/8208671/duplex-direct-promoteur-en-vente-au-jardins-de-carthage
4. https://www.mubawab.tn/fr/a/8211081/appartement-s1-en-vente
5. https://www.mubawab.tn/fr/a/8217232/duplex-en-s7-a-vendre-jardins-de-carthage


In [None]:
def scrape_mubawab_appartments(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/140.0.0.0 Safari/537.36 Edg/140.0.0.0",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Accept-Encoding": "gzip, deflate",
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
    }
    
    session = requests.Session()
    session.headers.update(headers)
    
    try:
        response = session.get(url)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Create a dictionary with fixed columns
        apartment_data = {
            'URL': url,
            'Title': '',
            'Price': '',
            'Location': '',
            'Features': '',
            'Details': ''
        }
        
        # Extract locations
        locations = soup.find_all('a', class_="darkblue")
        location_texts = [loc.get_text(strip=True).replace('\xa0', ' ') for loc in locations]
        apartment_data['Location'] = ' | '.join(location_texts)

        # Extract title and price
        title_elem = soup.find('h3', class_="greyTit")
        price_elem = soup.find('h3', class_="orangeTit")
        
        if title_elem:
            apartment_data['Title'] = title_elem.get_text(strip=True).replace('\n\t\t\t\t\t\t\t', ' ')
        if price_elem:
            apartment_data['Price'] = price_elem.get_text(strip=True).replace('\xa0', '')

        # Extract features
        features_list = []
        divs = soup.find_all('div', class_="adDetailFeature")
        for div in divs:
            span = div.find('span')
            if span:
                features_list.append(span.get_text(strip=True).replace('\n', ' '))
        
        apartment_data['Features'] = ' | '.join(features_list)

        # Extract main features
        details_list = []
        features = soup.find_all('p', class_='adMainFeatureContentLabel')
        features_values = soup.find_all('p', class_='adMainFeatureContentValue')
        for feature, value in zip(features, features_values):
            feature_text = feature.get_text(strip=True).replace('\n', ' ')
            value_text = value.get_text(strip=True).replace('\n', ' ')
            details_list.append(f"{feature_text}: {value_text}")
            
        # Extract secondary features
        secondary_features = soup.find_all('span', class_='fSize11 centered')
        for sec_feature in secondary_features:
            sec_feature_text = sec_feature.get_text(strip=True).replace('\n', ' ')
            details_list.append(f"{sec_feature_text}: Yes")
        
        apartment_data['Details'] = ' | '.join(details_list)
        
        return apartment_data 
        
    except requests.RequestException as e:
        print(f"Request failed: {e}")
        return {}





In [5]:
data = scrape_mubawab_appartments("https://www.mubawab.tn/fr/pa/8136239/vend-appartement-%C3%A0-sousse-jaouhara-2-belles-chambres")

df = pd.DataFrame([data])
df.to_csv("apartments.csv", index=False, encoding="utf-8")

['Se connecter', 'Immobilier Tunisie', 'Immobilier Sousse Jaouhara', 'Appartements Sousse Jaouhara', 'Sousse Jaouhara', 'Sousse Jaouhara à Sousse Jaouhara', '333000 TND', '116 \t\t\t\t\t\tm²', '3 Pièces', '2 Chambres', '1 Salle de bain', '116 \t\t\t\t\t\tm²', '3 Pièces', '2 Chambres', '1 Salle de bain', 'Type de bien: Appartement', 'Etat: Nouveau', 'Étage du bien: 5ème', 'Standing: Haut standing', 'État: Finalisé', 'Garage: Yes', 'Ascenseur: Yes', 'Concierge: Yes', 'Chambre rangement: Yes', 'Meublé: Yes', 'Salon européen: Yes', 'Antenne parabolique: Yes', 'Climatisation: Yes', 'Chauffage central: Yes', 'Sécurité: Yes', 'Double vitrage: Yes', 'Porte blindée: Yes', 'Cuisine équipée: Yes', 'Four: Yes', 'Micro-ondes: Yes']


In [6]:
df = pd.read_csv("apartments.csv")
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,34
0,Se connecter,Immobilier Tunisie,Immobilier Sousse Jaouhara,Appartements Sousse Jaouhara,Sousse Jaouhara,Sousse Jaouhara à Sousse Jaouhara,333000 TND,116 \t\t\t\t\t\tm²,3 Pièces,2 Chambres,...,Salon européen: Yes,Antenne parabolique: Yes,Climatisation: Yes,Chauffage central: Yes,Sécurité: Yes,Double vitrage: Yes,Porte blindée: Yes,Cuisine équipée: Yes,Four: Yes,Micro-ondes: Yes


In [7]:

data = scrape_mubawab_appartments("https://www.mubawab.tn/fr/a/8211784/appartement-s3-en-vente-lac-2")
if data:
    new_row = pd.DataFrame([data])
    existing_df = pd.read_csv("apartments.csv")
    updated_df = pd.concat([existing_df, new_row], ignore_index=True)
    updated_df.to_csv("apartments.csv", index=False, encoding="utf-8")



['Se connecter', 'Immobilier Tunisie', 'Immobilier La Marsa', 'Appartements La Marsa', 'Les Berges Du Lac 2', 'Les Berges Du Lac 2 à La Marsa', '1450000 TND', '275 \t\t\t\t\t\tm²', '4 Chambres', '2 Salles de bains', '275 \t\t\t\t\t\tm²', '4 Chambres', '2 Salles de bains', 'Type de bien: Appartement', 'Etat: Bon état', 'Années: 5-10 ans', 'Étage du bien: 9ème', 'Orientation: Sud', 'Type du sol: Marbre', 'Terrasse: Yes', 'Garage: Yes', 'Ascenseur: Yes', 'Vue sur mer: Yes', 'Vue sur les montagnes: Yes', 'Concierge: Yes', 'Chambre rangement: Yes', 'Façade extérieure: Yes', 'Antenne parabolique: Yes', 'Cheminée: Yes', 'Climatisation: Yes', 'Chauffage central: Yes', 'Sécurité: Yes', 'Porte blindée: Yes', 'Cuisine équipée: Yes', 'Four: Yes']


In [8]:
df = pd.read_csv("apartments.csv")
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25.1,26.1,27.1,28.1,29.1,30.1,31.1,32.1,33.1,34.1
0,Se connecter,Immobilier Tunisie,Immobilier Sousse Jaouhara,Appartements Sousse Jaouhara,Sousse Jaouhara,Sousse Jaouhara à Sousse Jaouhara,333000 TND,116 \t\t\t\t\t\tm²,3 Pièces,2 Chambres,...,,,,,,,,,,
1,,,,,,,,,,,...,Chambre rangement: Yes,Façade extérieure: Yes,Antenne parabolique: Yes,Cheminée: Yes,Climatisation: Yes,Chauffage central: Yes,Sécurité: Yes,Porte blindée: Yes,Cuisine équipée: Yes,Four: Yes
