In [89]:
!pip install beautifulsoup4
!pip install googlemaps


Collecting googlemaps
  Downloading googlemaps-4.10.0.tar.gz (33 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: googlemaps
  Building wheel for googlemaps (setup.py): started
  Building wheel for googlemaps (setup.py): finished with status 'done'
  Created wheel for googlemaps: filename=googlemaps-4.10.0-py3-none-any.whl size=40750 sha256=605a40dae4179f37665c093971eafc43994a6fd32c171b30aae64822f471e114
  Stored in directory: c:\users\liron\appdata\local\pip\cache\wheels\ca\e3\07\3a193cac660b6210a88180b659cb251f941cbff99a0f556136
Successfully built googlemaps
Installing collected packages: googlemaps
Successfully installed googlemaps-4.10.0


In [132]:
import requests
from bs4 import BeautifulSoup

base_url = "https://www.ad.co.il/nadlanrent?sp275=17413&sp277=17656,17981"
response = requests.get(base_url)
soup = BeautifulSoup(response.content, "html.parser")

ads = soup.find_all("div", class_="card-block")

all_links = []
for ad in ads:
    a_tag = ad.find("a", href=True)
    if a_tag:
        full_link = "https://www.ad.co.il" + a_tag["href"]
        all_links.append(full_link)



In [128]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
import json

#מקום להכנסת המפתח
API_KEY = "AIzaSyC1-TFkLyAboHCXtA1Ty_oHjouhFhMYVro"

# מרכז העיר
CENTER = "כיכר דיזנגוף, תל אביב"

# חילוץ מספר מהטקסט
def extract_number(text):
    try:
        number = re.findall(r"\d+(?:\.\d+)?", text.replace(",", ""))
        return float(number[0]) if number else None
    except:
        return None

#  Routes API
def get_distance_from_center_with_routes_api(origin_address):
    try:
        url = "https://routes.googleapis.com/directions/v2:computeRoutes"

        headers = {
            'Content-Type': 'application/json',
            'X-Goog-Api-Key': API_KEY,
            'X-Goog-FieldMask': 'routes.distanceMeters'
        }

        body = {
            "origin": {
                "address": origin_address
            },
            "destination": {
                "address": CENTER
            },
            "travelMode": "DRIVE",
            "routingPreference": "TRAFFIC_AWARE"
        }

        response = requests.post(url, headers=headers, data=json.dumps(body))

        if response.status_code == 200:
            data = response.json()
            distance_meters = data['routes'][0]['distanceMeters']
            return float(distance_meters)
        else:
            return None

    except:
        return None

#מאפיינים
features_map = {
    "חניה": "has_parking",
    "מחסן": "has_stotsge",
    "מעלית": "elevator",
    "מיזוג": "ac",
    "נגישות": "handicap",
    "סורגים": "has_bars",
    "ממ\"ד": "has_safe_room",
    "מרפסת": "has_balcon",
    "מרוהטת": "is_furnished",
    "משופצת": "is_renovated"
}

results = []

# לולאה שעוברת על הכל
for link in all_links:
    try:
        response = requests.get(link)
        soup = BeautifulSoup(response.content, "html.parser")

        # תיאור
        desc_tag = soup.find("div", class_="description-text")
        description = desc_tag.get_text(strip=True) if desc_tag else ""

        # מחיר
        price_tag = soup.find("div", class_="price")
        price = extract_number(price_tag.get_text(strip=True)) if price_tag else None

        # נתונים מהטבלה
        info_dict = {}
        table = soup.find("table", class_="table table-sm mb-4")
        if table:
            rows = table.find_all("tr")
            for row in rows:
                tds = row.find_all("td")
                if len(tds) >= 2:
                    key = tds[0].get_text(strip=True)
                    val = tds[1].get_text(strip=True)
                    info_dict[key] = val

        
        features_result = {v: 0 for v in features_map.values()}
        icons = soup.select("div.card-icon")
        for icon in icons:
            label = icon.find("span")
            if label:
                feature_name = label.get_text(strip=True)
                if feature_name in features_map:
                    field_name = features_map[feature_name]
                    if "disabled" not in icon.get("class", []):
                        features_result[field_name] = 1

        # כתובת או שכונה
        address = info_dict.get("כתובת", "")
        neighborhood = info_dict.get("שכונה", "")
        full_address = f"{address}, תל אביב" if address else f"{neighborhood}, תל אביב" if neighborhood else None

        distance = get_distance_from_center_with_routes_api(full_address) if full_address else None

        # יצירת שורת נתונים לכל מודעה
        row = {
            "property_type": str(info_dict.get("פרטי הנכס", "") or ""),
            "neighborhood": str(neighborhood or ""),
            "address": str(address or ""),
            "room_num": float(extract_number(info_dict.get("חדרים", "")) or 0),
            "floor": int(extract_number(info_dict.get("קומה", "")) or 0),
            "area": int(extract_number(info_dict.get("שטח בנוי", "")) or 0),
            "garden_area": int(extract_number(info_dict.get("שטח גינה", "")) or 0),
            "days_to_enter": int(extract_number(info_dict.get("תאריך כניסה", "")) or 0),
            "num_of_payments": int(extract_number(info_dict.get("תשלומים בשנה", "")) or 0),
            "monthly_arnona": int(extract_number(info_dict.get("ארנונה בחודש", "")) or 0),
            "building_tax": int(extract_number(info_dict.get("ועד בית בחודש", "")) or 0),
            "total_floors": int(extract_number(info_dict.get("קומות בבניין", "")) or 0),
            "description": str(description or ""),
            "has_parking": features_result["has_parking"],
            "has_stotsge": features_result["has_stotsge"],
            "elevator": features_result["elevator"],
            "ac": features_result["ac"],
            "handicap": features_result["handicap"],
            "has_bars": features_result["has_bars"],
            "has_safe_room": features_result["has_safe_room"],
            "has_balcon": features_result["has_balcon"],
            "is_furnished": features_result["is_furnished"],
            "is_renovated": features_result["is_renovated"],
            "price": float(price or 0),
            "num_of_images": int(len(soup.select("div.gallery-thumbs img"))),
            "distance_from_center": float(distance or 0)
        }

        results.append(row)
        time.sleep(1)

    except:
        continue

# יצירת DataFrame
df = pd.DataFrame(results)

# סדר עמודות 
ordered_columns = [
    "property_type", "neighborhood", "address", "room_num", "floor", "area", "garden_area",
    "days_to_enter", "num_of_payments", "monthly_arnona", "building_tax", "total_floors",
    "description", "has_parking", "has_stotsge", "elevator", "ac", "handicap",
    "has_bars", "has_safe_room", "has_balcon", "is_furnished", "is_renovated",
    "price", "num_of_images", "distance_from_center"
]
df = df[ordered_columns]

# שמירה לקובץ
df.to_csv("apartments.csv", index=False, encoding="utf-8-sig")

#הדפסת הדאטה פריים
pd.set_option('display.max_columns', None)
print(df.head())

  property_type                 neighborhood          address  room_num  \
0   סטודיו/לופט  הצפון החדש סביבת ככר המדינה          ליסין 9       1.0   
1          דירה                   שיכון בבלי  הכנסת הגדולה 11       3.5   
2          דירה  הצפון החדש סביבת ככר המדינה         פייבל 17       5.0   
3          דירה  הצפון החדש סביבת ככר המדינה         פייבל 16       4.0   
4          חניה  הצפון החדש סביבת ככר המדינה   ז'בוטינסקי 112       1.0   

   floor  area  garden_area  days_to_enter  num_of_payments  monthly_arnona  \
0      3    20            0              0               12             300   
1      4    90            0              0               12            1100   
2      4   127            0              0               12             503   
3      1   127            0              0               12            1400   
4      0    18            0              0               12               0   

   building_tax  total_floors description  has_parking  has_stotsge  eleva

In [145]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
import json

# 🔑 מפתח API
API_KEY = "AIzaSyC1-TFkLyAboHCXtA1Ty_oHjouhFhMYVro"

# 🏙️ מרכז תל אביב
CENTER = "כיכר דיזנגוף, תל אביב"

# ✅ רשימה סגורה לערך "פרטי הנכס"
allowed_property_types = [
    "דירה", "דירת גן", "בית פרטי/ קוטג'", "גג/ פנטהאוז",
    "מגרשים", "דופלקס", "תיירות ונופש", "כללי"
]

# חילוץ מספרים
def extract_number(text):
    try:
        number = re.findall(r"\d+(?:\.\d+)?", text.replace(",", ""))
        return float(number[0]) if number else None
    except:
        return None

# שימוש ב-Routes API לחישוב מרחק
def get_distance_from_center_with_routes_api(origin_address):
    try:
        url = "https://routes.googleapis.com/directions/v2:computeRoutes"

        headers = {
            'Content-Type': 'application/json',
            'X-Goog-Api-Key': API_KEY,
            'X-Goog-FieldMask': 'routes.distanceMeters'
        }

        body = {
            "origin": {
                "address": origin_address
            },
            "destination": {
                "address": CENTER
            },
            "travelMode": "DRIVE",
            "routingPreference": "TRAFFIC_AWARE"
        }

        response = requests.post(url, headers=headers, data=json.dumps(body))

        if response.status_code == 200:
            data = response.json()
            return float(data['routes'][0]['distanceMeters'])
        else:
            return None

    except:
        return None

# ✔ / ✖ מאפיינים
features_map = {
    "חניה": "has_parking",
    "מחסן": "has_stotsge",
    "מעלית": "elevator",
    "מיזוג": "ac",
    "נגישות": "handicap",
    "סורגים": "has_bars",
    "ממ\"ד": "has_safe_room",
    "מרפסת": "has_balcon",
    "מרוהטת": "is_furnished",
    "משופצת": "is_renovated"
}

results = []

# 🔁 מעבר על המודעות
for link in all_links:
    try:
        response = requests.get(link)
        soup = BeautifulSoup(response.content, "html.parser")

        # תיאור
        desc_tag = soup.find("p", class_="text-word-break")
        description = desc_tag.get_text(strip=True) if desc_tag else ""

        # מחיר
        price_tag = soup.find("div", class_="price")
        price = extract_number(price_tag.get_text(strip=True)) if price_tag else None

        # טבלת מידע
        info_dict = {}
        table = soup.find("table", class_="table table-sm mb-4")
        if table:
            rows = table.find_all("tr")
            for row in rows:
                tds = row.find_all("td")
                if len(tds) >= 2:
                    key = tds[0].get_text(strip=True)
                    val = tds[1].get_text(strip=True)
                    info_dict[key] = val

        # מאפייני ✔/✖
        features_result = {v: 0 for v in features_map.values()}
        icons = soup.select("div.card-icon")
        for icon in icons:
            label = icon.find("span")
            if label:
                feature_name = label.get_text(strip=True)
                if feature_name in features_map:
                    field_name = features_map[feature_name]
                    if "disabled" not in icon.get("class", []):
                        features_result[field_name] = 1

        # כתובת / שכונה
        address = info_dict.get("כתובת", "")
        neighborhood = info_dict.get("שכונה", "")
        full_address = f"{address}, תל אביב" if address else f"{neighborhood}, תל אביב" if neighborhood else None

        distance = get_distance_from_center_with_routes_api(full_address) if full_address else None

        # 🧠 וולידציה על property_type
        original_type = info_dict.get("פרטי הנכס", "") or ""
        property_type = original_type if original_type in allowed_property_types else "כללי"

        # יצירת שורת נתונים לפי הטיפוסים
        row = {
        "property_type": property_type,
        "neighborhood": str(neighborhood or ""),
        "address": str(address or ""),
        "room_num": extract_number(info_dict.get("חדרים", "")),
         "floor": int(extract_number(info_dict.get("קומה", ""))) if extract_number(info_dict.get("קומה", "")) is not None else None,
        "area": int(extract_number(info_dict.get("שטח בנוי", ""))) if extract_number(info_dict.get("שטח בנוי", "")) is not None else None,
        "garden_area": int(extract_number(info_dict.get("שטח גינה", "")) or 0),
        "days_to_enter": int(extract_number(info_dict.get("תאריך כניסה", ""))) if extract_number(info_dict.get("תאריך כניסה", "")) is not None else None,
        "num_of_payments": int(extract_number(info_dict.get("תשלומים בשנה", ""))) if extract_number(info_dict.get("תשלומים בשנה", "")) is not None else None,
        "monthly_arnona": int(extract_number(info_dict.get("ארנונה בחודש", ""))) if extract_number(info_dict.get("ארנונה בחודש", "")) is not None else None,
        "building_tax": int(extract_number(info_dict.get("ועד בית בחודש", ""))) if extract_number(info_dict.get("ועד בית בחודש", "")) is not None else None,
        "total_floors": int(extract_number(info_dict.get("קומות בבניין", ""))) if extract_number(info_dict.get("קומות בבניין", "")) is not None else None,

            "description": str(description or ""),
            "has_parking": features_result["has_parking"],
            "has_stotsge": features_result["has_stotsge"],
            "elevator": features_result["elevator"],
            "ac": features_result["ac"],
            "handicap": features_result["handicap"],
            "has_bars": features_result["has_bars"],
            "has_safe_room": features_result["has_safe_room"],
            "has_balcon": features_result["has_balcon"],
            "is_furnished": features_result["is_furnished"],
            "is_renovated": features_result["is_renovated"],
            "price": float(price or 0),
            "num_of_images": int(len(soup.select("div.gallery-thumbs img"))),
            "distance_from_center": float(distance or 0)
        }

        results.append(row)
        time.sleep(1)

    except:
        continue

# יצירת DataFrame
df = pd.DataFrame(results)

# סדר לפי הדרישות
ordered_columns = [
    "property_type", "neighborhood", "address", "room_num", "floor", "area", "garden_area",
    "days_to_enter", "num_of_payments", "monthly_arnona", "building_tax", "total_floors",
    "description", "has_parking", "has_stotsge", "elevator", "ac", "handicap",
    "has_bars", "has_safe_room", "has_balcon", "is_furnished", "is_renovated",
    "price", "num_of_images", "distance_from_center"
]
df = df[ordered_columns]

# שמירה לקובץ CSV
df.to_csv("apartments.csv", index=False, encoding="utf-8-sig")
pd.set_option('display.max_columns', None)
print(df.head())

  property_type                 neighborhood          address  room_num  \
0          כללי  הצפון החדש סביבת ככר המדינה          ליסין 9       1.0   
1          דירה                   שיכון בבלי  הכנסת הגדולה 11       3.5   
2          דירה  הצפון החדש סביבת ככר המדינה         פייבל 17       5.0   
3          דירה  הצפון החדש סביבת ככר המדינה         פייבל 16       4.0   
4          כללי  הצפון החדש סביבת ככר המדינה   ז'בוטינסקי 112       1.0   

   floor  area  garden_area  days_to_enter  num_of_payments  monthly_arnona  \
0    3.0    20            0            NaN             12.0           300.0   
1    4.0    90            0            NaN             12.0          1100.0   
2    4.0   127            0            NaN             12.0           503.0   
3    1.0   127            0            NaN             12.0          1400.0   
4    NaN    18            0            NaN             12.0             NaN   

   building_tax total_floors  \
0         150.0         None   
1         

In [151]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
import json
from datetime import datetime

# 🔑 API Key
API_KEY = "AIzaSyC1-TFkLyAboHCXtA1Ty_oHjouhFhMYVro"
CENTER = "כיכר דיזנגוף, תל אביב"

# ✅ property_type הרשאה לפי רשימה סגורה
allowed_property_types = [
    "דירה", "דירת גן", "בית פרטי/ קוטג'", "גג/ פנטהאוז",
    "מגרשים", "דופלקס", "תיירות ונופש", "כללי"
]

def extract_number(text):
    try:
        number = re.findall(r"\d+(?:\.\d+)?", text.replace(",", ""))
        return float(number[0]) if number else None
    except:
        return None

def parse_days_to_enter(text):
    if not text:
        return None
    text = text.strip()
    if "מיידית" in text:
        return 0
    try:
        entry_date = datetime.strptime(text, "%d.%m.%Y").date()
        today = datetime.today().date()
        days_diff = (entry_date - today).days
        return max(days_diff, 0)
    except:
        return None

def get_distance_from_center_with_routes_api(origin_address):
    try:
        url = "https://routes.googleapis.com/directions/v2:computeRoutes"
        headers = {
            'Content-Type': 'application/json',
            'X-Goog-Api-Key': API_KEY,
            'X-Goog-FieldMask': 'routes.distanceMeters'
        }
        body = {
            "origin": {"address": origin_address},
            "destination": {"address": CENTER},
            "travelMode": "DRIVE",
            "routingPreference": "TRAFFIC_AWARE"
        }
        response = requests.post(url, headers=headers, data=json.dumps(body))
        if response.status_code == 200:
            data = response.json()
            return float(data['routes'][0]['distanceMeters'])
        else:
            return None
    except:
        return None

# ✔ / ✖ מאפיינים
features_map = {
    "חניה": "has_parking",
    "מחסן": "has_stotsge",
    "מעלית": "elevator",
    "מיזוג": "ac",
    "נגישות": "handicap",
    "סורגים": "has_bars",
    "ממ\"ד": "has_safe_room",
    "מרפסת": "has_balcon",
    "מרוהטת": "is_furnished",
    "משופצת": "is_renovated"
}

results = []

# 🟢 שימי לב: המשתנה all_links אמור להכיל את כל הקישורים
# דוגמה:
# all_links = ["https://www.ad.co.il/ad/12345678", ...]

for link in all_links:
    try:
        response = requests.get(link)
        soup = BeautifulSoup(response.content, "html.parser")

        # תיאור
        desc_tag = soup.find("p", class_="text-word-break")
        description = desc_tag.get_text(strip=True) if desc_tag else ""

        # מחיר
        price_tag = soup.find("div", class_="price")
        price = extract_number(price_tag.get_text(strip=True)) if price_tag else None

        # טבלת מידע
        info_dict = {}
        table = soup.find("table", class_="table table-sm mb-4")
        if table:
            rows = table.find_all("tr")
            for row in rows:
                tds = row.find_all("td")
                if len(tds) >= 2:
                    key = tds[0].get_text(strip=True)
                    val = tds[1].get_text(strip=True)
                    info_dict[key] = val

        # ✔/✖ תוספים
        features_result = {v: 0 for v in features_map.values()}
        icons = soup.select("div.card-icon")
        for icon in icons:
            label = icon.find("span")
            if label:
                feature_name = label.get_text(strip=True)
                if feature_name in features_map:
                    field_name = features_map[feature_name]
                    if "disabled" not in icon.get("class", []):
                        features_result[field_name] = 1

        # כתובת ומרחק
        address = info_dict.get("כתובת", "")
        neighborhood = info_dict.get("שכונה", "")
        full_address = f"{address}, תל אביב" if address else f"{neighborhood}, תל אביב" if neighborhood else None
        distance = get_distance_from_center_with_routes_api(full_address) if full_address else None

        # קומות
        floor_text = info_dict.get("קומה", "")
        floor_parts = re.findall(r"\d+", floor_text)
        floor = int(floor_parts[0]) if len(floor_parts) >= 1 else None
        total_floors = int(floor_parts[1]) if len(floor_parts) >= 2 else None

        # טיפוס הנכס
        original_type = info_dict.get("פרטי הנכס", "") or ""
        property_type = original_type if original_type in allowed_property_types else "כללי"

        # שורת הנתונים
        row = {
            "property_type": property_type,
            "neighborhood": str(neighborhood or ""),
            "address": str(address or ""),
            "room_num": extract_number(info_dict.get("חדרים", "")),
            "floor": floor,
            "area": int(extract_number(info_dict.get("שטח בנוי", ""))) if extract_number(info_dict.get("שטח בנוי", "")) is not None else None,
            "garden_area": int(extract_number(info_dict.get("שטח גינה", "")) or 0),
            "days_to_enter": parse_days_to_enter(info_dict.get("תאריך כניסה", "")),
            "num_of_payments": int(extract_number(info_dict.get("תשלומים בשנה", ""))) if extract_number(info_dict.get("תשלומים בשנה", "")) is not None else None,
            "monthly_arnona": int(extract_number(info_dict.get("ארנונה בחודש", ""))) if extract_number(info_dict.get("ארנונה בחודש", "")) is not None else None,
            "building_tax": int(extract_number(info_dict.get("ועד בית בחודש", ""))) if extract_number(info_dict.get("ועד בית בחודש", "")) is not None else None,
            "total_floors": total_floors,
            "description": description,
            "has_parking": features_result["has_parking"],
            "has_stotsge": features_result["has_stotsge"],
            "elevator": features_result["elevator"],
            "ac": features_result["ac"],
            "handicap": features_result["handicap"],
            "has_bars": features_result["has_bars"],
            "has_safe_room": features_result["has_safe_room"],
            "has_balcon": features_result["has_balcon"],
            "is_furnished": features_result["is_furnished"],
            "is_renovated": features_result["is_renovated"],
            "price": float(price or 0),
            "num_of_images": len(soup.select("div.gallery-thumbs img")) or len(soup.find_all("img")),
            "distance_from_center": float(distance or 0)
        }

        results.append(row)
        time.sleep(1)

    except:
        continue

# יצירת DataFrame
df = pd.DataFrame(results)

# סדר עמודות
ordered_columns = [
    "property_type", "neighborhood", "address", "room_num", "floor", "area", "garden_area",
    "days_to_enter", "num_of_payments", "monthly_arnona", "building_tax", "total_floors",
    "description", "has_parking", "has_stotsge", "elevator", "ac", "handicap",
    "has_bars", "has_safe_room", "has_balcon", "is_furnished", "is_renovated",
    "price", "num_of_images", "distance_from_center"
]
df = df[ordered_columns]

# שמירה לקובץ
df.to_csv("apartments.csv", index=False, encoding="utf-8-sig")
pd.set_option('display.max_columns', None)
print(df.head())

  property_type                 neighborhood          address  room_num  \
0          כללי  הצפון החדש סביבת ככר המדינה          ליסין 9       1.0   
1          דירה                   שיכון בבלי  הכנסת הגדולה 11       3.5   
2          דירה  הצפון החדש סביבת ככר המדינה         פייבל 17       5.0   
3          דירה  הצפון החדש סביבת ככר המדינה         פייבל 16       4.0   
4          כללי  הצפון החדש סביבת ככר המדינה   ז'בוטינסקי 112       1.0   

   floor  area  garden_area  days_to_enter  num_of_payments  monthly_arnona  \
0    3.0    20            0            0.0             12.0           300.0   
1    4.0    90            0            NaN             12.0          1100.0   
2    4.0   127            0            0.0             12.0           503.0   
3    1.0   127            0            0.0             12.0          1400.0   
4    NaN    18            0            0.0             12.0             NaN   

   building_tax  total_floors  \
0         150.0           NaN   
1       

In [157]:
# קומות
floor_text = info_dict.get("קומה", "")
floor_parts = re.findall(r"\d+", floor_text)
floor = int(floor_parts[0]) if len(floor_parts) >= 1 else None
total_floors = int(floor_parts[1]) if len(floor_parts) >= 2 else None

# תיאור
desc_tag = soup.find("p", class_="text-word-break")
description = desc_tag.get_text(strip=True) if desc_tag and desc_tag.get_text(strip=True) else None

# תמונות
images = soup.select("div.gallery-thumbs img")
num_of_images = len(images) if images else None

# טיפוס הנכס
original_type = info_dict.get("פרטי הנכס", "") or ""
property_type = original_type if original_type in allowed_property_types else "כללי"

# שורת הנתונים המלאה
row = {
    "property_type": property_type,
    "neighborhood": str(neighborhood or ""),
    "address": str(address or ""),
    "room_num": extract_number(info_dict.get("חדרים", "")),
    "floor": floor,
    "area": int(extract_number(info_dict.get("שטח בנוי", ""))) if extract_number(info_dict.get("שטח בנוי", "")) is not None else None,
    "garden_area": int(extract_number(info_dict.get("שטח גינה", "")) or 0),
    "days_to_enter": parse_days_to_enter(info_dict.get("תאריך כניסה", "")),
    "num_of_payments": int(extract_number(info_dict.get("תשלומים בשנה", ""))) if extract_number(info_dict.get("תשלומים בשנה", "")) is not None else None,
    "monthly_arnona": int(extract_number(info_dict.get("ארנונה בחודש", ""))) if extract_number(info_dict.get("ארנונה בחודש", "")) is not None else None,
    "building_tax": int(extract_number(info_dict.get("ועד בית בחודש", ""))) if extract_number(info_dict.get("ועד בית בחודש", "")) is not None else None,
    "total_floors": total_floors if total_floors is not None else None,
    "description": description,
    "has_parking": features_result["has_parking"],
    "has_stotsge": features_result["has_stotsge"],
    "elevator": features_result["elevator"],
    "ac": features_result["ac"],
    "handicap": features_result["handicap"],
    "has_bars": features_result["has_bars"],
    "has_safe_room": features_result["has_safe_room"],
    "has_balcon": features_result["has_balcon"],
    "is_furnished": features_result["is_furnished"],
    "is_renovated": features_result["is_renovated"],
    "price": float(price or 0),
    "num_of_images": num_of_images,
    "distance_from_center": float(distance or 0)
}


        results.append(row)
        time.sleep(1)

    except:
        continue

# יצירת DataFrame
df = pd.DataFrame(results)

# סדר עמודות
ordered_columns = [
    "property_type", "neighborhood", "address", "room_num", "floor", "area", "garden_area",
    "days_to_enter", "num_of_payments", "monthly_arnona", "building_tax", "total_floors",
    "description", "has_parking", "has_stotsge", "elevator", "ac", "handicap",
    "has_bars", "has_safe_room", "has_balcon", "is_furnished", "is_renovated",
    "price", "num_of_images", "distance_from_center"
]
df = df[ordered_columns]

# שמירה לקובץ
df.to_csv("apartments.csv", index=False, encoding="utf-8-sig")
pd.set_option('display.max_columns', None)
print(df.head())

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 53)

In [164]:

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
import json
from datetime import datetime

API_KEY = "AIzaSyC1-TFkLyAboHCXtA1Ty_oHjouhFhMYVro"
CENTER = "כיכר דיזנגוף, תל אביב"

allowed_property_types = [
    "דירה", "דירת גן", "בית פרטי/ קוטג'", "גג/ פנטהאוז",
    "מגרשים", "דופלקס", "תיירות ונופש", "כללי"
]

def extract_number(text):
    try:
        number = re.findall(r"\d+(?:\.\d+)?", text.replace(",", ""))
        return float(number[0]) if number else None
    except:
        return None

def parse_days_to_enter(text):
    if not text:
        return None
    text = text.strip()
    if "מיידית" in text:
        return 0
    try:
        entry_date = datetime.strptime(text, "%d.%m.%Y").date()
        today = datetime.today().date()
        return max((entry_date - today).days, 0)
    except:
        return None

def get_distance_from_center_with_routes_api(origin_address):
    try:
        url = "https://routes.googleapis.com/directions/v2:computeRoutes"
        headers = {
            'Content-Type': 'application/json',
            'X-Goog-Api-Key': API_KEY,
            'X-Goog-FieldMask': 'routes.distanceMeters'
        }
        body = {
            "origin": {"address": origin_address},
            "destination": {"address": CENTER},
            "travelMode": "DRIVE",
            "routingPreference": "TRAFFIC_AWARE"
        }
        response = requests.post(url, headers=headers, data=json.dumps(body))
        if response.status_code == 200:
            data = response.json()
            return float(data['routes'][0]['distanceMeters'])
        else:
            return None
    except:
        return None

features_map = {
    "חניה": "has_parking",
    "מחסן": "has_stotsge",
    "מעלית": "elevator",
    "מיזוג": "ac",
    "נגישות": "handicap",
    "סורגים": "has_bars",
    "ממ\"ד": "has_safe_room",
    "מרפסת": "has_balcon",
    "מרוהטת": "is_furnished",
    "משופצת": "is_renovated"
}

# ודא שיש לך את all_links מוכן לפני הלולאה הזו
results = []

for link in all_links:
    try:
        response = requests.get(link)
        soup = BeautifulSoup(response.content, "html.parser")

        desc_tag = soup.find("p", class_="text-word-break")
        description = desc_tag.get_text(strip=True) if desc_tag and desc_tag.get_text(strip=True) else None

        price_tag = soup.find("div", class_="price")
        price = extract_number(price_tag.get_text(strip=True)) if price_tag else None

        info_dict = {}
        table = soup.find("table", class_="table table-sm mb-4")
        if table:
            rows = table.find_all("tr")
            for row in rows:
                tds = row.find_all("td")
                if len(tds) >= 2:
                    key = tds[0].get_text(strip=True)
                    val = tds[1].get_text(strip=True)
                    info_dict[key] = val

        features_result = {v: 0 for v in features_map.values()}
        icons = soup.select("div.card-icon")
        for icon in icons:
            label = icon.find("span")
            if label:
                feature_name = label.get_text(strip=True)
                if feature_name in features_map:
                    field_name = features_map[feature_name]
                    if "disabled" not in icon.get("class", []):
                        features_result[field_name] = 1

        address = info_dict.get("כתובת", "")
        neighborhood = info_dict.get("שכונה", "")
        full_address = f"{address}, תל אביב" if address else f"{neighborhood}, תל אביב" if neighborhood else None
        distance = get_distance_from_center_with_routes_api(full_address) if full_address else None

        floor_text = info_dict.get("קומה", "")
        floor_parts = re.findall(r"\d+", floor_text)
        floor = int(floor_parts[0]) if len(floor_parts) >= 1 else None
        total_floors = int(floor_parts[1]) if len(floor_parts) >= 2 else None

        images = soup.select("div.gallery-thumbs img")
        num_of_images = len(images) if images else None

        original_type = info_dict.get("פרטי הנכס", "") or ""
        property_type = original_type if original_type in allowed_property_types else "כללי"

        row = {
            "property_type": property_type,
            "neighborhood": str(neighborhood or ""),
            "address": str(address or ""),
            "room_num": extract_number(info_dict.get("חדרים", "")),
            "floor": floor,
            "area": int(extract_number(info_dict.get("שטח בנוי", ""))) if extract_number(info_dict.get("שטח בנוי", "")) is not None else None,
            "garden_area": int(extract_number(info_dict.get("שטח גינה", "")) or 0),
            "days_to_enter": parse_days_to_enter(info_dict.get("תאריך כניסה", "")),
            "num_of_payments": int(extract_number(info_dict.get("תשלומים בשנה", ""))) if extract_number(info_dict.get("תשלומים בשנה", "")) is not None else None,
            "monthly_arnona": int(extract_number(info_dict.get("ארנונה בחודש", ""))) if extract_number(info_dict.get("ארנונה בחודש", "")) is not None else None,
            "building_tax": int(extract_number(info_dict.get("ועד בית בחודש", ""))) if extract_number(info_dict.get("ועד בית בחודש", "")) is not None else None,
            "total_floors": total_floors if total_floors is not None else None,
            "description": description,
            "has_parking": features_result["has_parking"],
            "has_stotsge": features_result["has_stotsge"],
            "elevator": features_result["elevator"],
            "ac": features_result["ac"],
            "handicap": features_result["handicap"],
            "has_bars": features_result["has_bars"],
            "has_safe_room": features_result["has_safe_room"],
            "has_balcon": features_result["has_balcon"],
            "is_furnished": features_result["is_furnished"],
            "is_renovated": features_result["is_renovated"],
            "price": float(price or 0),
            "num_of_images": num_of_images,
            "distance_from_center": float(distance or 0)
        }

        results.append(row)
        time.sleep(1)

    except:
        continue

df = pd.DataFrame(results)

ordered_columns = [
    "property_type", "neighborhood", "address", "room_num", "floor", "area", "garden_area",
    "days_to_enter", "num_of_payments", "monthly_arnona", "building_tax", "total_floors",
    "description", "has_parking", "has_stotsge", "elevator", "ac", "handicap",
    "has_bars", "has_safe_room", "has_balcon", "is_furnished", "is_renovated",
    "price", "num_of_images", "distance_from_center"
]
df = df[ordered_columns]
df.to_csv("apartments.csv", index=False, encoding="utf-8-sig")
pd.set_option('display.max_columns', None)
print(df.head())
df.to_csv("apartments.csv", index=False, encoding="utf-8-sig")


  property_type                 neighborhood          address  room_num  \
0          כללי  הצפון החדש סביבת ככר המדינה          ליסין 9       1.0   
1          דירה                   שיכון בבלי  הכנסת הגדולה 11       3.5   
2          דירה  הצפון החדש סביבת ככר המדינה         פייבל 17       5.0   
3          דירה  הצפון החדש סביבת ככר המדינה         פייבל 16       4.0   
4          כללי  הצפון החדש סביבת ככר המדינה   ז'בוטינסקי 112       1.0   

   floor  area  garden_area  days_to_enter  num_of_payments  monthly_arnona  \
0    3.0    20            0            0.0             12.0           300.0   
1    4.0    90            0            NaN             12.0          1100.0   
2    4.0   127            0            0.0             12.0           503.0   
3    1.0   127            0            0.0             12.0          1400.0   
4    NaN    18            0            0.0             12.0             NaN   

   building_tax  total_floors  \
0         150.0           NaN   
1       

In [174]:

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
import json
from datetime import datetime

API_KEY = "AIzaSyC1-TFkLyAboHCXtA1Ty_oHjouhFhMYVro"
CENTER = "כיכר דיזנגוף, תל אביב"

allowed_property_types = [
    "דירה", "דירת גן", "בית פרטי/ קוטג'", "גג/ פנטהאוז",
    "מגרשים", "דופלקס", "תיירות ונופש", "כללי"
]

def extract_number(text):
    try:
        number = re.findall(r"\d+(?:\.\d+)?", text.replace(",", ""))
        return float(number[0]) if number else None
    except:
        return None

def parse_days_to_enter(text):
    if not text:
        return None
    text = text.strip()
    if "מיידית" in text:
        return 0
    try:
        entry_date = datetime.strptime(text, "%d.%m.%Y").date()
        today = datetime.today().date()
        return max((entry_date - today).days, 0)
    except:
        return None

def get_distance_from_center_with_routes_api(origin_address):
    try:
        url = "https://routes.googleapis.com/directions/v2:computeRoutes"
        headers = {
            'Content-Type': 'application/json',
            'X-Goog-Api-Key': API_KEY,
            'X-Goog-FieldMask': 'routes.distanceMeters'
        }
        body = {
            "origin": {"address": origin_address},
            "destination": {"address": CENTER},
            "travelMode": "DRIVE",
            "routingPreference": "TRAFFIC_AWARE"
        }
        response = requests.post(url, headers=headers, data=json.dumps(body))
        if response.status_code == 200:
            data = response.json()
            return float(data['routes'][0]['distanceMeters'])
        else:
            return None
    except:
        return None

features_map = {
    "חניה": "has_parking",
    "מחסן": "has_stotsge",
    "מעלית": "elevator",
    "מיזוג": "ac",
    "נגישות": "handicap",
    "סורגים": "has_bars",
    "ממ\"ד": "has_safe_room",
    "מרפסת": "has_balcon",
    "מרוהטת": "is_furnished",
    "משופצת": "is_renovated"
}

results = []

for link in all_links:
    try:
        response = requests.get(link)
        soup = BeautifulSoup(response.content, "html.parser")

        desc_tag = soup.find("p", class_="text-word-break")
        description = desc_tag.get_text(strip=True) if desc_tag and desc_tag.get_text(strip=True) else None

        price_tag = soup.find("div", class_="price")
        price = extract_number(price_tag.get_text(strip=True)) if price_tag else None

        info_dict = {}
        table = soup.find("table", class_="table table-sm mb-4")
        if table:
            rows = table.find_all("tr")
            for row in rows:
                tds = row.find_all("td")
                if len(tds) >= 2:
                    key = tds[0].get_text(strip=True)
                    val = tds[1].get_text(strip=True)
                    info_dict[key] = val

        features_result = {v: 0 for v in features_map.values()}
        icons = soup.select("div.card-icon")
        for icon in icons:
            label = icon.find("span")
            if label:
                feature_name = label.get_text(strip=True)
                if feature_name in features_map:
                    field_name = features_map[feature_name]
                    if "disabled" not in icon.get("class", []):
                        features_result[field_name] = 1

        address = info_dict.get("כתובת", "")
        neighborhood = info_dict.get("שכונה", "")
        full_address = f"{address}, תל אביב" if address else f"{neighborhood}, תל אביב" if neighborhood else None
        distance = get_distance_from_center_with_routes_api(full_address) if full_address else None

        floor_text = info_dict.get("קומה", "")
        floor_parts = re.findall(r"\d+", floor_text)
        floor = int(floor_parts[0]) if len(floor_parts) >= 1 else None
        total_floors = int(floor_parts[1]) if len(floor_parts) >= 2 else None

        images = soup.select('img[data-index]')
        num_of_images = len(images) if images else None


        original_type = info_dict.get("פרטי הנכס", "") or ""
        property_type = original_type if original_type in allowed_property_types else "כללי"

        row = {
            "property_type": property_type,
            "neighborhood": str(neighborhood or ""),
            "address": str(address or ""),
            "room_num": extract_number(info_dict.get("חדרים", "")),
            "floor": floor,
            "area": int(extract_number(info_dict.get("שטח בנוי", ""))) if extract_number(info_dict.get("שטח בנוי", "")) is not None else None,
            "garden_area": int(extract_number(info_dict.get("שטח גינה", "")) or 0),
            "days_to_enter": parse_days_to_enter(info_dict.get("תאריך כניסה", "")),
            "num_of_payments": int(extract_number(info_dict.get("תשלומים בשנה", ""))) if extract_number(info_dict.get("תשלומים בשנה", "")) is not None else None,
            "monthly_arnona": int(extract_number(info_dict.get("ארנונה בחודש", ""))) if extract_number(info_dict.get("ארנונה בחודש", "")) is not None else None,
            "building_tax": int(extract_number(info_dict.get("ועד בית בחודש", ""))) if extract_number(info_dict.get("ועד בית בחודש", "")) is not None else None,
            "total_floors": total_floors if total_floors is not None else None,
            "description": description if description else None,
            "has_parking": features_result["has_parking"],
            "has_stotsge": features_result["has_stotsge"],
            "elevator": features_result["elevator"],
            "ac": features_result["ac"],
            "handicap": features_result["handicap"],
            "has_bars": features_result["has_bars"],
            "has_safe_room": features_result["has_safe_room"],
            "has_balcon": features_result["has_balcon"],
            "is_furnished": features_result["is_furnished"],
            "is_renovated": features_result["is_renovated"],
            "price": float(price) if price is not None else None,
            "num_of_images": num_of_images,
            "distance_from_center": float(distance) if distance is not None else None
        }

        results.append(row)
        time.sleep(1)

    except:
        continue

df = pd.DataFrame(results)

ordered_columns = [
    "property_type", "neighborhood", "address", "room_num", "floor", "area", "garden_area",
    "days_to_enter", "num_of_payments", "monthly_arnona", "building_tax", "total_floors",
    "description", "has_parking", "has_stotsge", "elevator", "ac", "handicap",
    "has_bars", "has_safe_room", "has_balcon", "is_furnished", "is_renovated",
    "price", "num_of_images", "distance_from_center"
]
df = df[ordered_columns]
df.to_csv("apartments.csv", index=False, encoding="utf-8-sig")
pd.set_option('display.max_columns', None)
print(df.head())
df.to_csv("apartments.csv", index=False, encoding="utf-8-sig")


  property_type                 neighborhood          address  room_num  \
0          כללי  הצפון החדש סביבת ככר המדינה          ליסין 9       1.0   
1          דירה                   שיכון בבלי  הכנסת הגדולה 11       3.5   
2          דירה  הצפון החדש סביבת ככר המדינה         פייבל 17       5.0   
3          דירה  הצפון החדש סביבת ככר המדינה         פייבל 16       4.0   
4          כללי  הצפון החדש סביבת ככר המדינה   ז'בוטינסקי 112       1.0   

   floor  area  garden_area  days_to_enter  num_of_payments  monthly_arnona  \
0    3.0    20            0            0.0             12.0           300.0   
1    4.0    90            0            NaN             12.0          1100.0   
2    4.0   127            0            0.0             12.0           503.0   
3    1.0   127            0            0.0             12.0          1400.0   
4    NaN    18            0            0.0             12.0             NaN   

   building_tax  total_floors  \
0         150.0           NaN   
1       