# Web Scraper

In [None]:
import requests
import csv
from bs4 import BeautifulSoup
import time

# Base URL format
BASE_URL = "https://www.immoscout24.ch/de/immobilien/mieten/ort-{}"
LOCATIONS = ["bern", "zuerich", "interlaken", "genf", "aargau", "basel", "langenthal", "chur", "luzern", "st-gallen", "zug", "zermatt", "davos", "lausanne", "montreux", "neuchatel", "sion", "winterthur", "herzogenbuchsee", "basel", "burgdorf"]

CSS_SELECTOR = ".HgListingCard_altCard_kVJoS"
CSS_SELECTOR_DETAILS_DIV = ".HgListingRoomsLivingSpacePrice_roomsLivingSpacePrice_M6Ktp"
CSS_SELECTOR_ID = ".HgCardElevated_content_uir_2"
NEXT_PAGE_LABEL = "Zur nächsten Seite"

# Output file
OUTPUT_FILE = "immoscout_listings_test.csv"

# Function to scrape listings
def scrape_listings():
    data = []

    for location in LOCATIONS:

        url = BASE_URL.format(location)

        while url:
            print(f"Scraping {url}...")
            
            response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
            if response.status_code != 200:
                print(f"Failed to fetch {url}")
                continue
            
            soup = BeautifulSoup(response.text, "html.parser")
            listings = soup.select(CSS_SELECTOR)
            
            for listing in listings:
                id = listing.select(CSS_SELECTOR_ID)[0]['href'].split('/')[-1]

                details = listing.select(CSS_SELECTOR_DETAILS_DIV)[0].get_text().split(',')
                if len(details) < 3:
                    continue
                rooms = details[0].split(' ')[0]
                space = details[1].strip('m²').strip()
                rent = details[2].strip(' CHF').replace('.–', '').replace('’', '')

                address = listing.find('address').get_text().split(',')
                zip_code = address[-1].strip().split(' ')[0]
                city = " ".join(address[-1].strip().split(' ')[1:])

                if not rent or not space or not rooms or not city or not zip_code or rent == "Preis auf Anfrage":
                    continue
                
                data.append([id, city, zip_code, rooms, space, rent])

            url = get_next_page_url(soup)
            time.sleep(10)

    return data

def get_next_page_url(soup):
    next_page = soup.find("a", {"aria-label": NEXT_PAGE_LABEL})
    return f"https://www.immoscout24.ch{next_page['href']}" if next_page else None

# Write to CSV
def save_to_csv(data):
    with open(OUTPUT_FILE, "w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["id", "location", "plz", "rooms", "space", "price"])
        writer.writerows(data)
    print(f"Saved data to {OUTPUT_FILE}")

if __name__ == "__main__":
    scraped_data = scrape_listings()
    save_to_csv(scraped_data)

Scraping https://www.immoscout24.ch/de/immobilien/mieten/ort-bern...
Scraping https://www.immoscout24.ch/de/immobilien/mieten/ort-bern?pn=2...
Scraping https://www.immoscout24.ch/de/immobilien/mieten/ort-bern?pn=3...
Scraping https://www.immoscout24.ch/de/immobilien/mieten/ort-bern?pn=4...
Scraping https://www.immoscout24.ch/de/immobilien/mieten/ort-bern?pn=5...
Scraping https://www.immoscout24.ch/de/immobilien/mieten/ort-bern?pn=6...
Scraping https://www.immoscout24.ch/de/immobilien/mieten/ort-bern?pn=7...
Scraping https://www.immoscout24.ch/de/immobilien/mieten/ort-bern?pn=8...
Scraping https://www.immoscout24.ch/de/immobilien/mieten/ort-bern?pn=9...
Scraping https://www.immoscout24.ch/de/immobilien/mieten/ort-bern?pn=10...
Scraping https://www.immoscout24.ch/de/immobilien/mieten/ort-bern?pn=11...


KeyboardInterrupt: 

# Data Dispatcher

In [1]:
import requests
import csv
import json

# iter over entries in csv file

with open('immoscout_listings.csv', newline='') as csvfile:

    reader = csv.DictReader(csvfile)
    apartments_list = list(reader)

    url = 'http://65.108.84.170:3000/add_apartments'
    response = requests.post(url, json=apartments_list)

    if response.status_code != 200:
        raise Exception(f"Failed to send data to API: {response.text}")