GitHub Project Link:
https://github.com/lironlle/projectApartmentPrice<br>
Submitted by:
Shira Lewitinn 322592551
Liron Levi 208005637

In [177]:
!pip install beautifulsoup4
!pip install googlemaps




In [132]:
import requests
from bs4 import BeautifulSoup

base_url = "https://www.ad.co.il/nadlanrent?sp275=17413&sp277=17656,17981"
response = requests.get(base_url)
soup = BeautifulSoup(response.content, "html.parser")

ads = soup.find_all("div", class_="card-block")

all_links = []
for ad in ads:
    a_tag = ad.find("a", href=True)
    if a_tag:
        full_link = "https://www.ad.co.il" + a_tag["href"]
        all_links.append(full_link)



In [174]:

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
import json
from datetime import datetime

API_KEY = "AIzaSyC1-TFkLyAboHCXtA1Ty_oHjouhFhMYVro"
CENTER = "כיכר דיזנגוף, תל אביב"

allowed_property_types = [
    "דירה", "דירת גן", "בית פרטי/ קוטג'", "גג/ פנטהאוז",
    "מגרשים", "דופלקס", "תיירות ונופש", "כללי"
]

def extract_number(text):
    try:
        number = re.findall(r"\d+(?:\.\d+)?", text.replace(",", ""))
        return float(number[0]) if number else None
    except:
        return None

def parse_days_to_enter(text):
    if not text:
        return None
    text = text.strip()
    if "מיידית" in text:
        return 0
    try:
        entry_date = datetime.strptime(text, "%d.%m.%Y").date()
        today = datetime.today().date()
        return max((entry_date - today).days, 0)
    except:
        return None

def get_distance_from_center_with_routes_api(origin_address):
    try:
        url = "https://routes.googleapis.com/directions/v2:computeRoutes"
        headers = {
            'Content-Type': 'application/json',
            'X-Goog-Api-Key': API_KEY,
            'X-Goog-FieldMask': 'routes.distanceMeters'
        }
        body = {
            "origin": {"address": origin_address},
            "destination": {"address": CENTER},
            "travelMode": "DRIVE",
            "routingPreference": "TRAFFIC_AWARE"
        }
        response = requests.post(url, headers=headers, data=json.dumps(body))
        if response.status_code == 200:
            data = response.json()
            return float(data['routes'][0]['distanceMeters'])
        else:
            return None
    except:
        return None

features_map = {
    "חניה": "has_parking",
    "מחסן": "has_stotsge",
    "מעלית": "elevator",
    "מיזוג": "ac",
    "נגישות": "handicap",
    "סורגים": "has_bars",
    "ממ\"ד": "has_safe_room",
    "מרפסת": "has_balcon",
    "מרוהטת": "is_furnished",
    "משופצת": "is_renovated"
}

results = []

for link in all_links:
    try:
        response = requests.get(link)
        soup = BeautifulSoup(response.content, "html.parser")

        desc_tag = soup.find("p", class_="text-word-break")
        description = desc_tag.get_text(strip=True) if desc_tag and desc_tag.get_text(strip=True) else None

        price_tag = soup.find("div", class_="price")
        price = extract_number(price_tag.get_text(strip=True)) if price_tag else None

        info_dict = {}
        table = soup.find("table", class_="table table-sm mb-4")
        if table:
            rows = table.find_all("tr")
            for row in rows:
                tds = row.find_all("td")
                if len(tds) >= 2:
                    key = tds[0].get_text(strip=True)
                    val = tds[1].get_text(strip=True)
                    info_dict[key] = val

        features_result = {v: 0 for v in features_map.values()}
        icons = soup.select("div.card-icon")
        for icon in icons:
            label = icon.find("span")
            if label:
                feature_name = label.get_text(strip=True)
                if feature_name in features_map:
                    field_name = features_map[feature_name]
                    if "disabled" not in icon.get("class", []):
                        features_result[field_name] = 1

        address = info_dict.get("כתובת", "")
        neighborhood = info_dict.get("שכונה", "")
        full_address = f"{address}, תל אביב" if address else f"{neighborhood}, תל אביב" if neighborhood else None
        distance = get_distance_from_center_with_routes_api(full_address) if full_address else None

        floor_text = info_dict.get("קומה", "")
        floor_parts = re.findall(r"\d+", floor_text)
        floor = int(floor_parts[0]) if len(floor_parts) >= 1 else None
        total_floors = int(floor_parts[1]) if len(floor_parts) >= 2 else None

        images = soup.select('img[data-index]')
        num_of_images = len(images) if images else None


        original_type = info_dict.get("פרטי הנכס", "") or ""
        property_type = original_type if original_type in allowed_property_types else "כללי"

        row = {
            "property_type": property_type,
            "neighborhood": str(neighborhood or ""),
            "address": str(address or ""),
            "room_num": extract_number(info_dict.get("חדרים", "")),
            "floor": floor,
            "area": int(extract_number(info_dict.get("שטח בנוי", ""))) if extract_number(info_dict.get("שטח בנוי", "")) is not None else None,
            "garden_area": int(extract_number(info_dict.get("שטח גינה", "")) or 0),
            "days_to_enter": parse_days_to_enter(info_dict.get("תאריך כניסה", "")),
            "num_of_payments": int(extract_number(info_dict.get("תשלומים בשנה", ""))) if extract_number(info_dict.get("תשלומים בשנה", "")) is not None else None,
            "monthly_arnona": int(extract_number(info_dict.get("ארנונה בחודש", ""))) if extract_number(info_dict.get("ארנונה בחודש", "")) is not None else None,
            "building_tax": int(extract_number(info_dict.get("ועד בית בחודש", ""))) if extract_number(info_dict.get("ועד בית בחודש", "")) is not None else None,
            "total_floors": total_floors if total_floors is not None else None,
            "description": description if description else None,
            "has_parking": features_result["has_parking"],
            "has_stotsge": features_result["has_stotsge"],
            "elevator": features_result["elevator"],
            "ac": features_result["ac"],
            "handicap": features_result["handicap"],
            "has_bars": features_result["has_bars"],
            "has_safe_room": features_result["has_safe_room"],
            "has_balcon": features_result["has_balcon"],
            "is_furnished": features_result["is_furnished"],
            "is_renovated": features_result["is_renovated"],
            "price": float(price) if price is not None else None,
            "num_of_images": num_of_images,
            "distance_from_center": float(distance) if distance is not None else None
        }

        results.append(row)
        time.sleep(1)

    except:
        continue

df = pd.DataFrame(results)

ordered_columns = [
    "property_type", "neighborhood", "address", "room_num", "floor", "area", "garden_area",
    "days_to_enter", "num_of_payments", "monthly_arnona", "building_tax", "total_floors",
    "description", "has_parking", "has_stotsge", "elevator", "ac", "handicap",
    "has_bars", "has_safe_room", "has_balcon", "is_furnished", "is_renovated",
    "price", "num_of_images", "distance_from_center"
]
df = df[ordered_columns]
df.to_csv("apartments.csv", index=False, encoding="utf-8-sig")
pd.set_option('display.max_columns', None)
print(df.head())
df.to_csv("apartments.csv", index=False, encoding="utf-8-sig")


  property_type                 neighborhood          address  room_num  \
0          כללי  הצפון החדש סביבת ככר המדינה          ליסין 9       1.0   
1          דירה                   שיכון בבלי  הכנסת הגדולה 11       3.5   
2          דירה  הצפון החדש סביבת ככר המדינה         פייבל 17       5.0   
3          דירה  הצפון החדש סביבת ככר המדינה         פייבל 16       4.0   
4          כללי  הצפון החדש סביבת ככר המדינה   ז'בוטינסקי 112       1.0   

   floor  area  garden_area  days_to_enter  num_of_payments  monthly_arnona  \
0    3.0    20            0            0.0             12.0           300.0   
1    4.0    90            0            NaN             12.0          1100.0   
2    4.0   127            0            0.0             12.0           503.0   
3    1.0   127            0            0.0             12.0          1400.0   
4    NaN    18            0            0.0             12.0             NaN   

   building_tax  total_floors  \
0         150.0           NaN   
1       