The first step to obtain the data, is to request the API client and secret from Idealista.
It can be done here: https://developers.idealista.com/access-request

Once we have our credentials, we can start creating a function to generate our personalised token, which will give us authorisation to extract data.

In [None]:
# Import the necessary libraries

import base64
import requests as rq
import json

In [None]:
# Define a function in order to obtain our personalised token

def get_oauth_token():
    '''
    This function will return our personalised token
    '''
    api_key = 'djvzi2qwau9s1iae317x3sumhi4ovilo'   # Your API key provided by Idealista
    secret = 'dExwCHGjb87b'   # Your secred code provided by Idealista

    """
    api_key = 'djvzi2qwau9s1iae317x3sumhi4ovilo'   # Your API key provided by Idealista
    secret = 'dExwCHGjb87b'   # Your secred code provided by Idealista

    api_key = 'z11k3lqpxg3z6zf80628a8745n8sqrdg'   # Your API key provided by Idealista
        secret = 'Fem4xrnUfG8N'   # Your secred code provided by Idealista"""

    message = api_key + ":" + secret   # Combine the API key and the secret to get our personalised message

    auth = "Basic " + base64.b64encode(message.encode("ascii")).decode("ascii")   # Encode the message

    headers_dic = {"Authorization" : auth,
                   "Content-Type" : "application/x-www-form-urlencoded;charset=UTF-8"}   # Define our headers

    params_dic = {"grant_type" : "client_credentials",   # Define the request params
                  "scope" : "read"}

    r = rq.post("https://api.idealista.com/oauth/token",   # Perform the request with the api url, headers and params
                      headers = headers_dic,
                      params = params_dic)

    token = json.loads(r.text)['access_token']   # Obtain the personalised token, as a json

    return token

The next step will be to define the search url. We will do this by defining the filter params of the search, and combining them into the resulting url.

In [None]:
# This are the params we will use to filter our search

base_url = 'https://api.idealista.com/3.5/'     # Base search url
country = 'es'     # Search country (es, it, pt)
language = 'es'     # Search language (es, it, pt, en, ca)
max_items = '50'     # Max items per call, the maximum set by Idealista is 50
operation = 'sale'     # Kind of operation (sale, rent)
property_type = 'homes'     # Type of property (homes, offices, premises, garages, bedrooms)
order = 'priceDown'     # Order of the listings, consult documentation for all the available orders
center = '40.4167,-3.70325'     # Coordinates of the search center
distance = '15000'     # Max distance from the center
sort = 'desc'     # How to sort the found items
bankOffer = 'false'     # If the owner is a bank
maxprice = '850000'
minprice = '100000'
# Max price of the listings

In [None]:
# Define a function to obtain our search url

def define_search_url():
    '''
    This function will combine our params with the url, in order to create our own search url
    '''
    url = (base_url +
           country +
           '/search?operation=' + operation +
           '&maxItems=' + max_items +
           '&order=' + order +
           '&center=' + center +
           '&distance=' + distance +
           '&propertyType=' + property_type +
           '&sort=' + sort +
           '&numPage=%s' +
           '&maxPrice=' + maxprice +
           '&language=' + language)

    return url

In [None]:
url = define_search_url()

In [None]:
print(url)

https://api.idealista.com/3.5/es/search?operation=sale&maxItems=50&order=priceDown&center=40.4167,-3.70325&distance=15000&propertyType=homes&sort=desc&numPage=%s&maxPrice=850000&language=es


Now we need to create a function in order to do our search, with the data we already have.

In [None]:
def search_api(url):
    '''
    This function will use the token and url created previously, and return our search results.
    '''
    token = get_oauth_token()   #  Get the personalised token

    headers = {'Content-Type': 'Content-Type: multipart/form-data;',   # Define the search headers
               'Authorization' : 'Bearer ' + token}

    content = rq.post(url, headers = headers)   # Return the content from the request

    result = json.loads(content.text)   # Transform the result as a json file

    return result

In [None]:
# Since we need to give pagination to our search and this is our first search, we will set the pagination as 1
pagination = 100 #100
first_search_url = url %(pagination)

In [None]:
print(first_search_url)

https://api.idealista.com/3.5/es/search?operation=sale&maxItems=50&order=priceDown&center=40.4167,-3.70325&distance=15000&propertyType=homes&sort=desc&numPage=100&maxPrice=850000&language=es


In [None]:
# Proceed to do the search with the paginated url
results = search_api(first_search_url)

We have already gotten all the results, so we can start working in them.

In [None]:
# First of all, we can extract 50 results/page, but there are more pages, so we have to define how many pages there are.

total_pages = results['totalPages']

In [None]:
print(total_pages)

145


We can also save our first dataset. We will save it as an individual dataframe, but then we have to add it to a bigger dataset, which will contains all our individual dataframes.

We will create two functions for that.

In [None]:
# Import the necessary libraries

import pandas as pd

In [None]:
def results_to_df(results):
    '''
    This function will save the json results as a dataframe and return the resulting dataframe
    '''
    df = pd.DataFrame.from_dict(results['elementList'])

    return df

In [None]:
def concat_df(df, df_tot):
    '''
    This function will take the main dataframe (df_tot), and concat it with the given individual dataframe,
    returning the main dataframe
    '''
    df_tot= pd.concat([df_tot,df],ignore_index=True)

    return df_tot

In [None]:
# Proceed to save the obtained results as a dataframe
df = results_to_df(results)

In [None]:
# Since we still don't have a main dataframe where we can store all the data, we will create an empty dataframe
df_tot = pd.DataFrame()
df_tot = concat_df(df, df_tot)

In [None]:
url = define_search_url()

In [None]:
print(url)

https://api.idealista.com/3.5/es/search?operation=sale&maxItems=50&order=priceDown&center=40.4167,-3.70325&distance=15000&propertyType=homes&sort=desc&numPage=%s&maxPrice=850000&language=es


In [None]:
print(df_tot.head(1))

  propertyCode                                          thumbnail  numPhotos  \
0    107236927  https://img4.idealista.com/blur/WEB_LISTING/0/...         18   

  floor     price                                          priceInfo  \
0     2  219000.0  {'price': {'amount': 219000.0, 'currencySuffix...   

  propertyType operation  size exterior  ...          detailedType  \
0         flat      sale  84.0     True  ...  {'typology': 'flat'}   

                                      suggestedTexts hasPlan has3DTour has360  \
0  {'subtitle': 'Los Ángeles, Madrid', 'title': '...   False     False  False   

  hasStaging topNewDevelopment topPlus  parkingSpace  externalReference  
0      False             False   False           NaN                NaN  

[1 rows x 39 columns]


At this point, we have all the necessary code to extract the data, but we need to extract data in all the pages of the results. For this, we will create a loop, which will perform a search in every of the pages

In [None]:
# This will loop over all the pages in the search results. It start from 2, because we've already gotten the first page
for i in range(pagination+1, total_pages):
    url = define_search_url()
    url = url %(i)   # Add the pagination to the url
    results = search_api(url)   # Get the search results
    df = results_to_df(results)   # Save the results as a dataframe
    df_tot = concat_df(df, df_tot)   # Concat the results to the main dataframe

In [None]:
# Once we have all our data, we just need to save it as a csv file, we have created the following function for that:

file_path = 'idealista_sale_02_2025_2.csv'

def df_to_csv(df):
    '''
    This function will take a given dataframe and save it as a csv file
    '''
    df = df.reset_index()   # Reset the index in order to organise the records
    df.to_csv(file_path, index=False)   # Save it into a csv

In [None]:
# Run the function and you'll obtain a csv file with all the extracted data
df_to_csv(df_tot)

Idealista API has a limitation of 100 search queries/month, so this should be taken into consideration when performing the search queries

In the following cells you can find the resulting dataframe from the search.

In [None]:
df = pd.read_csv('../input/idealista-madrid-rental-real-state/idealista_api_rent_madrid.csv')


In [None]:
df.head()