In [100]:
import requests
import pandas as pd
import numpy as np
import time
import json

## 1. Flats: very good condition

In [101]:
BASE_URL = (
    "https://www.sreality.cz/api/v1/estates/search?"
    "category_main_cb=1"
    "&locality_country_id=112"
    "&locality_region_id=10"
    "&building_condition=1"
    "&ownership=1"
    "&limit={limit}"
    "&offset={offset}"
    "&sort=-date"
    "&lang=cs"
)

limit = 22 # max limit per request (max 22 for this API)
offset = 0 # starting offset (0 for the first request)
total_results = float('inf')
all_results = []

# Download data in a loop until all results are fetched
try:
    while offset < total_results:
        url = BASE_URL.format(limit=limit, offset=offset)
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()

        results = data.get("results", [])
        if not results:
            break

        all_results.extend(results)

        if total_results == float('inf'):
            total_results = data["pagination"]["total"]

        offset += limit

    df_very_good = pd.DataFrame(all_results)
    #add column condition
    df_very_good['condition'] = 'very good'
    print(f"✅ Downloaded {len(df_very_good)} flats in very good condition.")

except requests.exceptions.RequestException as e:
    print(f"❌ Error while downolading: {e}")

✅ Downloaded 3023 flats in very good condition.


In [102]:
data

{'meta_description': '3 023 bytů v aktuální nabídce byty Praha ✓ Parametry hledání: Byty, v osobním vlastnictví, velmi dobrý stav, Praha ✓ Největší nabídka nemovitostí v Česku (100 007 inzerátů) s hledáním na mapě a filtrováním s desítkami parametrů ✓',
 'meta_title': 'Byty, v osobním vlastnictví, velmi dobrý stav Praha • Sreality.cz',
 'pagination': {'limit': 22, 'offset': 3014, 'total': 3023},
 'results': [{'advert_images': ['//d18-a.sdn.cz/d_18/c_img_of_A/kPRtHkVQLlCIYXk39E1c8Yx/3914.jpeg',
    '//d18-a.sdn.cz/d_18/c_img_of_A/kPRtHkVQLlBaYpX2jE1c8ZT/0fee.jpeg',
    '//d18-a.sdn.cz/d_18/c_img_of_A/kPRtHkVQLlD9vAd1QE1c8Z3/f1e5.jpeg'],
   'advert_images_all': [{'advert_image_sdn_url': '//d18-a.sdn.cz/d_18/c_img_of_A/kPRtHkVQLlCIYXk39E1c8Yx/3914.jpeg',
     'restb_room_type': 15},
    {'advert_image_sdn_url': '//d18-a.sdn.cz/d_18/c_img_of_A/kPRtHkVQLlBaYpX2jE1c8ZT/0fee.jpeg',
     'restb_room_type': 5},
    {'advert_image_sdn_url': '//d18-a.sdn.cz/d_18/c_img_of_A/kPRtHkVQLlD9vAd1QE1c8Z3

## 2. Flats: good condition

In [103]:
BASE_URL = (
    "https://www.sreality.cz/api/v1/estates/search?"
    "category_main_cb=1"
    "&locality_country_id=112"
    "&locality_region_id=10"
    "&building_condition=2"
    "&ownership=1"
    "&limit={limit}"
    "&offset={offset}"
    "&sort=-date"
    "&lang=cs"
)

limit = 22 # max limit per request (max 22 for this API)
offset = 0 # starting offset (0 for the first request)
total_results = float('inf')
all_results = []

# Download data in a loop until all results are fetched
try:
    while offset < total_results:
        url = BASE_URL.format(limit=limit, offset=offset)
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()

        results = data.get("results", [])
        if not results:
            break

        all_results.extend(results)

        if total_results == float('inf'):
            total_results = data["pagination"]["total"]

        offset += limit
    #add column condition
    df_very_good['condition'] = 'good'
    df_good = pd.DataFrame(all_results)
    print(f"✅ Downloaded {len(df_good)} flats in good condition.")

except requests.exceptions.RequestException as e:
    print(f"❌ Error while downolading: {e}")

✅ Downloaded 599 flats in good condition.


## 3. Flats: in development

In [104]:
BASE_URL = (
    "https://www.sreality.cz/api/v1/estates/search?"
    "category_main_cb=1"
    "&locality_country_id=112"
    "&locality_region_id=10"
    "&building_condition=4"
    "&ownership=1"
    "&limit={limit}"
    "&offset={offset}"
    "&sort=-date"
    "&lang=cs"
)

limit = 22 # max limit per request (max 22 for this API)
offset = 0 # starting offset (0 for the first request)
total_results = float('inf')
all_results = []

# Download data in a loop until all results are fetched
try:
    while offset < total_results:
        url = BASE_URL.format(limit=limit, offset=offset)
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()

        results = data.get("results", [])
        if not results:
            break

        all_results.extend(results)

        if total_results == float('inf'):
            total_results = data["pagination"]["total"]

        offset += limit
    #add column condition
    df_very_good['condition'] = 'development'
    df_development = pd.DataFrame(all_results)
    print(f"✅ Downloaded {len(df_development)} flats in development.")

except requests.exceptions.RequestException as e:
    print(f"❌ Error while downolading: {e}")

✅ Downloaded 499 flats in development.


## 4. Flats: new flats

In [105]:
BASE_URL = (
    "https://www.sreality.cz/api/v1/estates/search?"
    "category_main_cb=1"
    "&locality_country_id=112"
    "&locality_region_id=10"
    "&building_condition=6"
    "&ownership=1"
    "&limit={limit}"
    "&offset={offset}"
    "&sort=-date"
    "&lang=cs"
)

limit = 22 # max limit per request (max 22 for this API)
offset = 0 # starting offset (0 for the first request)
total_results = float('inf')
all_results = []

# Download data in a loop until all results are fetched
try:
    while offset < total_results:
        url = BASE_URL.format(limit=limit, offset=offset)
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()

        results = data.get("results", [])
        if not results:
            break

        all_results.extend(results)

        if total_results == float('inf'):
            total_results = data["pagination"]["total"]

        offset += limit
    #add column condition
    df_very_good['condition'] = 'new'
    df_new = pd.DataFrame(all_results)
    print(f"✅ Downloaded {len(df_new)} new flats.")

except requests.exceptions.RequestException as e:
    print(f"❌ Error while downolading: {e}")

✅ Downloaded 1665 new flats.


## Merge all data from sreality

In [106]:
df = pd.concat([df_very_good, df_good, df_development, df_new], ignore_index=True)
print(f"✅ Merged dataset contains {len(df)} flats.")
df.head()

✅ Merged dataset contains 5786 flats.


Unnamed: 0,advert_images,advert_images_all,advert_name,category_main_cb,category_sub_cb,category_type_cb,discount_show,has_matterport_url,has_video,hash_id,...,price,price_currency_cb,price_czk,price_czk_m2,price_summary,price_summary_czk,price_summary_unit_cb,price_unit_cb,user_id,condition
0,[//d18-a.sdn.cz/d_18/c_img_oZ_D/nsLxLojIreywJr...,[{'advert_image_sdn_url': '//d18-a.sdn.cz/d_18...,Pronájem bytu 3+kk 116 m²,"{'name': 'Byty', 'value': 1}","{'name': '3+kk', 'value': 6}","{'name': 'Pronájem', 'value': 2}",False,False,False,1747272268,...,70000.0,"{'name': 'Kč', 'value': 1}",70000.0,603,70000.0,70000.0,"{'name': 'za měsíc', 'value': 2}","{'name': 'za měsíc', 'value': 2}",36332.0,new
1,[//d18-a.sdn.cz/d_18/c_img_oe_B/kOzmiQlmfoDiQh...,[{'advert_image_sdn_url': '//d18-a.sdn.cz/d_18...,Pronájem bytu 3+kk 90 m²,"{'name': 'Byty', 'value': 1}","{'name': '3+kk', 'value': 6}","{'name': 'Pronájem', 'value': 2}",False,False,False,708821836,...,36000.0,"{'name': 'Kč', 'value': 1}",36000.0,400,36000.0,36000.0,"{'name': 'za měsíc', 'value': 2}","{'name': 'za měsíc', 'value': 2}",136385.0,new
2,[//d18-a.sdn.cz/d_18/c_img_og_B/nDJ4VEZEqCs9Cy...,[{'advert_image_sdn_url': '//d18-a.sdn.cz/d_18...,Prodej bytu 4+1 80 m²,"{'name': 'Byty', 'value': 1}","{'name': '4+1', 'value': 9}","{'name': 'Prodej', 'value': 1}",False,False,True,3775144780,...,11190000.0,"{'name': 'Kč', 'value': 1}",11190000.0,139875,11190000.0,11190000.0,"{'name': 'za nemovitost', 'value': 1}","{'name': 'za nemovitost', 'value': 1}",43697.0,new
3,[//d18-a.sdn.cz/d_18/c_img_of_D/kOzkBkwYBTQzPh...,[{'advert_image_sdn_url': '//d18-a.sdn.cz/d_18...,Pronájem bytu 1+kk 35 m²,"{'name': 'Byty', 'value': 1}","{'name': '1+kk', 'value': 2}","{'name': 'Pronájem', 'value': 2}",False,False,False,2770346828,...,23500.0,"{'name': 'Kč', 'value': 1}",23500.0,671,23500.0,23500.0,"{'name': 'za měsíc', 'value': 2}","{'name': 'za měsíc', 'value': 2}",284446.0,new
4,[//d18-a.sdn.cz/d_18/c_img_og_A/nDJ4VEZEqCemGm...,[{'advert_image_sdn_url': '//d18-a.sdn.cz/d_18...,Pronájem bytu 2+kk 87 m²,"{'name': 'Byty', 'value': 1}","{'name': '2+kk', 'value': 4}","{'name': 'Pronájem', 'value': 2}",False,False,False,1076564812,...,32000.0,"{'name': 'Kč', 'value': 1}",32000.0,368,32000.0,32000.0,"{'name': 'za měsíc', 'value': 2}","{'name': 'za měsíc', 'value': 2}",36332.0,new


In [107]:
# Function to extract name from dictionary
def get_name(obj):
    if isinstance(obj, dict) and 'name' in obj:
        return obj['name']
    return None

# Function to extract image URL
def get_first_image_url(image_list):
    if isinstance(image_list, list) and len(image_list) > 0 and isinstance(image_list[0], dict) and 'advert_image_sdn_url' in image_list[0]:
        return image_list[0]['advert_image_sdn_url']
    return None

# Function to extract plain image URL
def get_first_plain_image_url(image_list):
    if isinstance(image_list, list) and len(image_list) > 0:
        return image_list[0]
    return None

# Function to extract city
def get_city(locality_obj):
    if isinstance(locality_obj, dict) and 'city' in locality_obj:
        return locality_obj['city']
    return None

# Function to extract gps_lon
def get_gps_lon(locality_obj):
    if isinstance(locality_obj, dict) and 'gps_lon' in locality_obj:
        return locality_obj['gps_lon']
    return None

# Function to extract gps_lat
def get_gps_lat(locality_obj):
    if isinstance(locality_obj, dict) and 'gps_lat' in locality_obj:
        return locality_obj['gps_lat']
    return None

# Function to extract region
def get_region(locality_obj):
    if isinstance(locality_obj, dict) and 'region' in locality_obj:
        return locality_obj['region']
    return None

# Function to extract district
def get_district(locality_obj):
    if isinstance(locality_obj, dict) and 'district' in locality_obj:
        return locality_obj['district']
    return None

# Function to extract citypart
def get_citypart(locality_obj):
    if isinstance(locality_obj, dict) and 'citypart' in locality_obj:
        return locality_obj['citypart']
    return None

# Apply the functions to relevant columns
df['category_main_name'] = df['category_main_cb'].apply(get_name)
df['category_sub_name'] = df['category_sub_cb'].apply(get_name)
df['category_type_name'] = df['category_type_cb'].apply(get_name)
df['price_currency_name'] = df['price_currency_cb'].apply(get_name)
df['price_summary_unit_name'] = df['price_summary_unit_cb'].apply(get_name)
df['price_unit_name'] = df['price_unit_cb'].apply(get_name)

df['first_advert_image'] = df['advert_images'].apply(get_first_plain_image_url)
df['first_advert_image_all'] = df['advert_images_all'].apply(get_first_image_url)

df['city'] = df['locality'].apply(get_city)
df['region'] = df['locality'].apply(get_region)
df['gps_lat'] = df['locality'].apply(get_gps_lat)
df['gps_lon'] = df['locality'].apply(get_gps_lon)
df['district'] = df['locality'].apply(get_district)
df['citypart'] = df['locality'].apply(get_citypart)

# Drop the original columns with JSON structures
df_cleaned = df.drop(columns=[
    'advert_images',
    'advert_images_all',
    #'category_main_cb',
    #'category_sub_cb',
    #'category_type_cb',
    #'price_currency_cb',
    #'price_summary_unit_cb',
    #'price_unit_cb',
    #'locality'
])

df_cleaned.transpose()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5776,5777,5778,5779,5780,5781,5782,5783,5784,5785
advert_name,Pronájem bytu 3+kk 116 m²,Pronájem bytu 3+kk 90 m²,Prodej bytu 4+1 80 m²,Pronájem bytu 1+kk 35 m²,Pronájem bytu 2+kk 87 m²,Pronájem bytu 3+kk 96 m²,Prodej bytu 2+kk 52 m²,Pronájem bytu 2+kk 78 m²,Pronájem bytu 4+1 79 m²,Pronájem bytu 3+kk 60 m²,...,Prodej bytu 3+kk 119 m²,Prodej bytu 2+kk 54 m²,Prodej bytu 2+kk 68 m²,Prodej bytu 2+kk 51 m²,Prodej bytu 3+kk 119 m²,Prodej bytu 3+kk 109 m²,Prodej bytu 3+kk 92 m²,Prodej bytu 5+kk 231 m²,Prodej bytu 4+kk 190 m²,Prodej bytu 4+kk 440 m²
category_main_cb,"{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}",...,"{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}","{'name': 'Byty', 'value': 1}"
category_sub_cb,"{'name': '3+kk', 'value': 6}","{'name': '3+kk', 'value': 6}","{'name': '4+1', 'value': 9}","{'name': '1+kk', 'value': 2}","{'name': '2+kk', 'value': 4}","{'name': '3+kk', 'value': 6}","{'name': '2+kk', 'value': 4}","{'name': '2+kk', 'value': 4}","{'name': '4+1', 'value': 9}","{'name': '3+kk', 'value': 6}",...,"{'name': '3+kk', 'value': 6}","{'name': '2+kk', 'value': 4}","{'name': '2+kk', 'value': 4}","{'name': '2+kk', 'value': 4}","{'name': '3+kk', 'value': 6}","{'name': '3+kk', 'value': 6}","{'name': '3+kk', 'value': 6}","{'name': '5+kk', 'value': 10}","{'name': '4+kk', 'value': 8}","{'name': '4+kk', 'value': 8}"
category_type_cb,"{'name': 'Pronájem', 'value': 2}","{'name': 'Pronájem', 'value': 2}","{'name': 'Prodej', 'value': 1}","{'name': 'Pronájem', 'value': 2}","{'name': 'Pronájem', 'value': 2}","{'name': 'Pronájem', 'value': 2}","{'name': 'Prodej', 'value': 1}","{'name': 'Pronájem', 'value': 2}","{'name': 'Pronájem', 'value': 2}","{'name': 'Pronájem', 'value': 2}",...,"{'name': 'Prodej', 'value': 1}","{'name': 'Prodej', 'value': 1}","{'name': 'Prodej', 'value': 1}","{'name': 'Prodej', 'value': 1}","{'name': 'Prodej', 'value': 1}","{'name': 'Prodej', 'value': 1}","{'name': 'Prodej', 'value': 1}","{'name': 'Prodej', 'value': 1}","{'name': 'Prodej', 'value': 1}","{'name': 'Prodej', 'value': 1}"
discount_show,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
has_matterport_url,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
has_video,False,False,True,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
hash_id,1747272268,708821836,3775144780,2770346828,1076564812,1800082252,3115258700,3636253516,1176703820,3368751948,...,1003520844,4188119628,3781657420,1048020300,55833420,4257957196,3570091340,3978982732,3709474124,2481338972
locality,"{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...",...,"{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c...","{'city': 'Praha', 'city_seo_name': 'praha', 'c..."
poi_atm_distance,109,410,520,386,159,602,686,449,788,454,...,483,402,483,233,402,914,930,388,393,454


In [108]:
df_cleaned["locality"].iloc[0]

{'city': 'Praha',
 'city_seo_name': 'praha',
 'citypart': 'Vinohrady',
 'citypart_seo_name': 'vinohrady',
 'country': 'Česká republika',
 'country_id': 112,
 'country_seo_name': 'ceska-republika',
 'district': 'Praha 2',
 'district_id': 5002,
 'district_seo_name': 'praha-2',
 'entity_type': 'address',
 'geohash': 'u2fkbt66g3',
 'gps_lat': 50.08303,
 'gps_lon': 14.43927,
 'housenumber': '2563',
 'inaccuracy_type': 'gps',
 'municipality': None,
 'municipality_id': 3468,
 'municipality_seo_name': None,
 'quarter': 'Praha 2',
 'quarter_id': 88,
 'quarter_seo_name': 'praha-2',
 'region': 'Hlavní město Praha',
 'region_id': 10,
 'region_seo_name': 'hlavni-mesto-praha',
 'street': 'Italská',
 'street_id': 119846,
 'street_seo_name': 'italska',
 'streetnumber': '53',
 'ward': None,
 'ward_id': 14967,
 'ward_seo_name': None}