In [1]:
from dateutil import parser
import datetime
import re
from bs4 import BeautifulSoup as bs
import requests
import json
import pandas as pd
import time

### Getting the regions list for sales and rent 

In [2]:
# Load the webpage
webpage_path = 'https://www.onmap.co.il/en'

r = requests.get(webpage_path)

# Convert to a beautiful soup object
soup = bs(r.content)

# Getting the URLs list of regions for sale
buy_urls = []
for url in soup.find_all("input", {"type" : "radio",'name':"home-page-cities-links","value" : re.compile(".*/buy/\w+.*")}):
    buy_urls.append(webpage_path+url['value'])

# Getting the URLs list of regions for rent
for url in soup.find_all("input", {"type" : "radio",'name':"home-page-cities-links","value" : re.compile(".*/rent/\w+.*")}):
    buy_urls.append(webpage_path+url['value'])

### Getting the sales data per region from the webserver

In [3]:
sales_per_region_database_list = []
for url in buy_urls:
    region_name = url.split('/')[-1]
    sales_per_region_database_list.append(f'https://phoenix.onmap.co.il/v1/properties/mixed_search?option=buy&section=residence&city={region_name}&$sort=-is_top_promoted+-search_date')

### Getting the rent data per region from the webserver

In [4]:
rent_per_region_database_list = []
for url in buy_urls:
    region_name = url.split('/')[-1]
    rent_per_region_database_list.append(f'https://phoenix.onmap.co.il/v1/properties/mixed_search?option=rent&section=residence&city={region_name}&$sort=-is_top_promoted+-search_date')

### columns_names for rent_data_frame

In [5]:
columns_names = ['Date', 'City_name', 'Street_name', 'House_number', 'Bathrooms', 'Rooms', 'Floor', 'Area[m^2]', 'Parking_spots_aboveground', 'Parking_spots_underground', 'Price[NIS]', 'Property_type']

### Scraper

In [21]:
rent_df = pd.DataFrame(columns=columns_names)
for region in rent_per_region_database_list:
    r = requests.get(region)
    time.sleep(2)
    soup = bs(r.content)
    apartment_dict_list = json.loads(soup.p.get_text())['data']
    ff = 0
    for apartment_dict in apartment_dict_list:
        ff += 1
        df_row = {'Date' : [parser.parse(apartment_dict['created_at']).date()],
                   'City_name' : [apartment_dict['address']['en']['city_name']],
                   'Street_name': [apartment_dict['address']['en']['street_name']],
                   'House_number': [apartment_dict['address']['en']['house_number']],
                   'Bathrooms': [apartment_dict['additional_info']['bathrooms']],
                   'Rooms' : [apartment_dict['additional_info']['rooms']],
                   'Floor' : [apartment_dict['additional_info']['floor']['on_the']],
                   'Area[m^2]' : [apartment_dict['additional_info']['area']['base']],
                   'Parking_spots_aboveground' : [apartment_dict['additional_info'].get('parking', {}).get('aboveground')],
                   'Parking_spots_underground' : [apartment_dict['additional_info'].get('parking', {}).get('underground')],
                   'Price[NIS]' : [apartment_dict['price']],
                   'Property_type' : [apartment_dict['property_type']]
                 }
        rent_df = pd.concat([rent_df, pd.DataFrame(df_row)])
        if ff == 3: 
            break

### Reset the Index

In [23]:
rent_df.reset_index(drop=True, inplace=True)

In [24]:
rent_df

Unnamed: 0,Date,City_name,Street_name,House_number,Bathrooms,Rooms,Floor,Area[m^2],Parking_spots_aboveground,Parking_spots_underground,Price[NIS],Property_type
0,2021-02-03,Tel Aviv-Yafo,Geula St.,,2,4.0,2,100,none,1,13500,apartment
1,2020-11-25,Tel Aviv-Yafo,Nissim Aloni St.,10.0,1,3.0,10,75,none,2,7999,apartment
2,2021-02-14,Tel Aviv-Yafo,HaRav Yizkhak Yedidya Frenkel,,1,2.0,4,45,,,4000,apartment
3,2021-01-21,Netanya,Pierre Koenig St.,11.0,2,4.0,7,117,none,1,6000,apartment
4,2021-02-14,Netanya,Geva St.,,2,5.0,3,300,1,none,5200,apartment
5,2020-02-26,Netanya,Nitsa Boulevard,20.0,2,4.0,17,147,none,2,8000,apartment
6,2021-02-14,Rishon LeTsiyon,Rabbi Kook St.,3.0,2,3.0,1,72,1,none,4100,apartment
7,2020-08-11,Rishon LeTsiyon,HaSadeh St.,,2,3.5,3,80,1,none,4500,apartment
8,2020-09-27,Rishon LeTsiyon,Rabi Yehuda ha-Nasi St.,23.0,1,5.0,2,129,2,none,7000,apartment
9,2019-06-13,Rehovot,Rachel Hirshenzon St.,,1,2.0,4,40,,,2800,apartment


### Writing the renting data to csv

In [25]:
with open("Renting_data.csv",'w') as f:
    rent_df.to_csv(f, header=rent_df.columns, index=False, line_terminator='\n')