In [None]:
    # Leer página inicial de la parrilla de la ciudad enviada por parámetro. OK
    # Obtener el número de páginas totales a recorrer. OK
    # Almacenar todas las urls de propiedades que debo recorrer OK
    # Meterme en cada url almacenada y scrapear window.__INITIAL_PROPS__ OK
    # Limpiar el JSON de cada propiedad OK
    # Cambiar de página y comenzar el proceso iterativo recorriendo todas las propiedades de cada página. OK

In [37]:
%%time

from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--enable-javascript')
chrome_options.add_argument('user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36')

from bs4 import BeautifulSoup
import concurrent.futures
import os
import shutil
import numpy as np
import requests
import re
import time
import pandas as pd
from lxml import etree, html
import json


class fotocasa_scraping:
  '''
  Clase para inicializar el scraping de Fotocasa.com
  '''

  def check_features(self, data):
    '''
    Comprobaciones de features para lectura correcta
    '''    
    # Diccionario de features
    realestate = {
        'title': '',
        'link': '',
        'image_url': '',
        'country': '',
        'district': '',
        'neighborhood': '',
        'street': '',
        'zipCode': '',
        'province': '',
        'buildingType': '',
        'clientAlias': '',
        'latitude': '',
        'longitude': '',
        'isNewConstruction': '',
        'rooms': '',
        'bathrooms': '',
        'parking': '',
        'elevator': '',
        'furnished': '',
        'surface': '',
        'energyCertificate': '',
        'hotWater': '',
        'heating': '',
        'conservationState': '',
        'antiquity': '',
        'floor': '',
        'surfaceLand': '',
        'otherFeatures': '',
        'price': '',     
        }
    # Comienzan las comprobaciones feature a feature
    try:
        realestate['title'] = data['propertyTitle']    
    except:
        realestate['title'] = '^'
        
    try:
        realestate['link'] = 'https://www.fotocasa.es' + data['realEstate']['detail']['es-ES']   
    except:
        realestate['link'] = '^'

    try:
        realestate['image_url'] = data['realEstate']['multimedia'][1]['src']
    except:
        realestate['image_url'] = '^'
        
    try:
        realestate['country'] = data['realEstate']['address']['country']
    except:
        realestate['country'] = '^'
        
    try:
        realestate['district'] = data['realEstate']['address']['district']
    except:
        realestate['district'] = '^'
        
    try:
        realestate['neighborhood'] = data['realEstate']['address']['neighborhood']
    except:
        realestate['neighborhood'] = '^'
        
    try:
        realestate['street'] = data['realEstate']['location']
    except:
        realestate['street'] = '^'
        
    try:
        realestate['zipCode'] = data['realEstate']['address']['zipCode']
    except:
        realestate['zipCode'] = '^'
        
    try:
        realestate['province'] = data['realEstate']['address']['province']
    except:
        realestate['province'] = '^'
        
    try:
        realestate['buildingType'] = data['realEstate']['buildingType']
    except:
        realestate['buildingType'] = '^'

    try:
        realestate['clientAlias'] = data['realEstate']['clientAlias']
    except:
        realestate['clientAlias'] = '^'
        
    try:
        realestate['latitude'] = data['realEstate']['coordinates']['latitude']
    except:
        realestate['latitude'] = '^'

    try:
        realestate['longitude'] = data['realEstate']['coordinates']['longitude']
    except:
        realestate['longitude'] = '^'
        
    try:
        realestate['isNewConstruction'] = data['realEstate']['isNewConstruction']
    except:
        realestate['isNewConstruction'] = '^'
        
    try:
        realestate['rooms'] = data['realEstate']['features']['rooms']
    except:
        realestate['rooms'] = '^'
        
    try:
        realestate['bathrooms'] = data['realEstate']['features']['bathrooms']
    except:
        realestate['bathrooms'] = '^'

    try:
        featureList = data['realEstate']['featuresList']
        realestate['parking'] = ''.join([featureList[index]['value'] for index,value in enumerate(featureList) if featureList[index]['label'] == 'parking'])
        
    except:
        realestate['parking'] = '^'

    try:
        featureList = data['realEstate']['featuresList']
        realestate['elevator'] = ''.join([featureList[index]['value'] for index,value in enumerate(featureList) if featureList[index]['label'] == 'elevator'])
        
    except:
        realestate['elevator'] = '^'

    try:
        featureList = data['realEstate']['featuresList']
        realestate['furnished'] = ''.join([featureList[index]['value'] for index,value in enumerate(featureList) if featureList[index]['label'] == 'furnished'])
        
    except:
        realestate['furnished'] = '^'
        
    try:
        realestate['surface'] = data['realEstate']['features']['surface']
    except:
        realestate['surface'] = '^'
        
    try:
        realestate['energyCertificate'] = data['realEstate']['energyCertificate']
    except:
        realestate['energyCertificate'] = '^'
        
    try:
        realestate['hotWater'] = data['realEstate']['features']['hotWater']
        featureList = data['realEstate']['featuresList']
        realestate['hotWater'] = ''.join([featureList[index]['value'] for index,value in enumerate(featureList) if featureList[index]['label'] == 'hotWater'])
        
    except:
        realestate['hotWater'] = '^'
        
    try:
        realestate['heating'] = data['realEstate']['features']['heating']
        featureList = data['realEstate']['featuresList']
        realestate['heating'] = ''.join([featureList[index]['value'] for index,value in enumerate(featureList) if featureList[index]['label'] == 'heating'])
       
    except:
        realestate['heating'] = '^'
        
    try:
        realestate['conservationState'] = data['realEstate']['features']['conservationState']
        featureList = data['realEstate']['featuresList']
        realestate['conservationState'] = ''.join([featureList[index]['value'] for index,value in enumerate(featureList) if featureList[index]['label'] == 'conservationState'])
       
    except:
        realestate['conservationState'] = '^'
        
    try:
        realestate['antiquity'] = data['realEstate']['features']['antiquity']
        featureList = data['realEstate']['featuresList']
        realestate['antiquity'] = ''.join([featureList[index]['value'] for index,value in enumerate(featureList) if featureList[index]['label'] == 'antiquity'])
       
    except:
        realestate['antiquity'] = '^'
        
    try:
        realestate['floor'] = data['realEstate']['features']['floor']
    except:
        realestate['floor'] = '^'
        
    try:
        realestate['surfaceLand'] = data['realEstate']['features']['surfaceLand']
    except:
        realestate['surfaceLand'] = '^'
        
    try:
        realestate['otherFeatures'] = data['realEstate']['otherFeatures']
    except:
        realestate['otherFeatures'] = '^'
        
    try:
        realestate['price'] = data['realEstate']['price']
    except:
        realestate['price'] = 0
        
    #devuelve un diccionario
    return realestate

  def parse_properties(self, driver, url_list, page, download = False):
    # Recibo una lista de urls de la propiedad y lo separo en un diccionario. Devuelvo dataframe.
    df_page = pd.DataFrame()
    i = 0
    #print(url_list)
    
    if download == True:
        directory = f'fotocasa/fotocasa_{page}'
        os.mkdir(directory)
    
    for url in url_list:
        driver.get(url)
        element_present = EC.presence_of_element_located((By.XPATH, '//div[@id="modal-react-portal"]'))
        WebDriverWait(driver, 5).until(element_present) # el driver debe esperarse a que la página se cargue        
        
        html_txt = driver.page_source
        soup = BeautifulSoup(html_txt,'html.parser')
        prop_scripts = soup.findAll('script')
        prop_features = ''.join([re.search('window.__INITIAL_PROPS__ = JSON.parse(.*)\n',str(x)).group(1) for x in prop_scripts if re.search('window.__INITIAL_PROPS__',str(x))])
        prop_features_clean = re.sub(r'\\"','"',prop_features)
        prop_features_clean = re.sub(r'\\\\"','',prop_features_clean)
        prop_features_clean = re.sub(r'\("|"\);','',prop_features_clean)
        prop_features_clean = re.sub(r',"seo":.*','}',prop_features_clean)

        try:
            prop_data = json.loads(prop_features_clean)
            realestate = self.check_features(prop_data)
            
            if download == True:
                self.download_realestates(prop_features_clean,page,i)
                i = i + 1
            
            df = pd.DataFrame([realestate])
            df_page = pd.concat([df_page,df],ignore_index=True)
              
        except:
            print('Error ' + url) # + '\n' + str(prop_features_clean))
        
        #time.sleep(0.2)
        
    return df_page

  def download_realestates(self,realestate,page,num):
    f = open('fotocasa/fotocasa_%s/realestate_%s_%s' % (page,page,num), 'w') # la W es para permisos de writing (escritura)
    f.write(realestate)
    f.close()
    
    return None

  def property_list(self, driver, city, page):
    # Recibo un número de página. Almaceno las urls de todas las propiedades de cada página de parrilla. Devuelvo una lista de urls.
    driver.get('https://www.fotocasa.es/es/comprar/viviendas/' + city.lower() + '-provincia/todas-las-zonas/l' + '/' + str(page))
        
    for scroll in range(40): # nos aseguramos que llega al final de la página
        ActionChains(driver).key_down(Keys.PAGE_DOWN).perform()
        #element_present = EC.presence_of_element_located((By.XPATH, '//a[contains(@class, "info-container")]'))
        #WebDriverWait(driver, 15).until(element_present) # el driver debe esperarse a que la página se cargue 
        time.sleep(0.1)
    
    property_url_list = []
    element = driver.find_elements(By.XPATH,'//a[contains(@class, "info-container")]') # or contains(@class, "carousel") or contains(@class, "slider")
    #print(f'Entra la página {page}. La longitud de element es {len(element)}')
    #print(f'Element de pag {page}: {element[0].get_attribute("href")}')

    [property_url_list.append(element[scroll].get_attribute('href')) for scroll in range(len(element))]
    
    return property_url_list

  def pages_to_scrape(self, driver, city):
    # Obtengo el número de páginas totales que debo recorrer. Devuelvo un entero.

    driver.get('https://www.fotocasa.es/es/comprar/viviendas/' + city.lower() + '-provincia/todas-las-zonas/l')
    
    page_selector = []
    while len(page_selector) < 1:
        html_txt = driver.page_source
        soup = BeautifulSoup(html_txt,'html.parser')
        page_selector = soup.findAll('li',attrs={'class':'sui-MoleculePagination-item'})
        ActionChains(driver).key_down(Keys.PAGE_DOWN).perform()
        time.sleep(0.1)
       
    n_pages = re.search('<span class="sui-AtomButton-inner">(.*)</span>',str(page_selector[-2])).group(1)
        
    return int(n_pages)

  def divide_pages(self, page_range):
        
    if page_range[1] >= 10:
        pages = []
        num_pages = list(range(page_range[0], page_range[1]))
        percentiles = [int(np.percentile(num_pages,x)) for x in range(0, 100, 20)]

        for loc in range(len(percentiles)):
            if percentiles[loc] == 1:
                start = 1
            else:
                start = percentiles[loc]+1

            if loc == len(percentiles)-1:
                end = page_range[1]
            else:
                end = percentiles[loc+1]

            pages.append([start, end+1])
            
        return pages
    else:
        return [page_range]
    
    return None

  def scraping_loop(self, page_range, driver, city, download):
    data_d = pd.DataFrame()
    
    for page in range(page_range[0], page_range[1]):
        properties_per_page = self.property_list(driver, city, page)
        property_data = self.parse_properties(driver, properties_per_page, page, download)

        data_d = pd.concat([data_d,property_data], ignore_index=True)

        print(f'Página {str(page)} terminada, ')
        
    return data_d

  def __init__(self, city = 'Madrid', page_range = [0,0], download = False):
    '''
    Inicio de la clase con el scraping de Fotocasa para la ciudad indicada como parámetro.
    Por defecto: Madrid
    '''
    self.data = pd.DataFrame()
    # Creo los webdrivers necesarios para la lectura concurrente
    driver_1 = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options)
    driver_2 = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options)
    driver_3 = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options)
    driver_4 = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options)
    driver_5 = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=chrome_options)
    drivers = [driver_1, driver_2, driver_3, driver_4, driver_5]
    
    # Extraigo el máximo de páginas a scrapear de la parrilla de la ciudad
    total_pages = self.pages_to_scrape(driver_1, city)
    
    # Compruebo posibles errores en los rangos de páginas introducidos o el valor por defecto
    if page_range[0] == 0 and page_range[1] == 0:
        page_range[0] = 1
        page_range[1] = total_pages
        
    if page_range[1] > total_pages:
        page_range[1] = total_pages
    
    if page_range[0] > page_range[1]:
        aux = page_range[0]
        page_range[0] = page_range[1]
        page_range[1] = aux
        
    print('Pages to scrape: ' + str(page_range[1]))

    # Divido el rango introducido en 5 grupos iguales
    page_range = self.divide_pages(page_range)
    
    if download == True:
        directory = 'fotocasa'

        if os.path.exists(directory):
            shutil.rmtree(directory) # si existe borro la carpeta y todo su contenido
        os.mkdir(directory)
        
    range_driver_list = list(zip(page_range,drivers)) # asigno un driver a cada uno de los 5 rangos para no solapar lecturas del webdriver
        
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        futures = []
            
        for range_driver in range_driver_list:
            futures.append(executor.submit(self.scraping_loop, page_range = range_driver[0], driver = range_driver[1], city = city, download = download))
                                
        for future in concurrent.futures.as_completed(futures):
            self.data = pd.concat([self.data,future.result()], ignore_index = True)

    print('Done!')
    self.data.to_csv('concurrent_scraping_' + str(page_range[4][1]) + '.csv',index=False)
    return None

CPU times: user 86 µs, sys: 0 ns, total: 86 µs
Wall time: 89.9 µs


In [32]:
%%time
ft_scraping = fotocasa_scraping(city = 'Madrid', page_range = [1,11] , download = False)






Pages to scrape: 11
Recopiladas urls de 1.
Recopiladas urls de 7.
Recopiladas urls de 9.
Recopiladas urls de 5.
Recopiladas urls de 3.
Página 7 terminada, 
Página 9 terminada, 
Página 5 terminada, 
Página 3 terminada, 
Recopiladas urls de 8.
Recopiladas urls de 10.
Recopiladas urls de 6.
Recopiladas urls de 4.
Página 1 terminada, 
Recopiladas urls de 2.
Página 10 terminada, 
Página 8 terminada, 
Página 4 terminada, 
Página 6 terminada, 
Recopiladas urls de 11.
Página 2 terminada, 
Página 11 terminada, 
CPU times: user 19 s, sys: 1.91 s, total: 20.9 s
Wall time: 2min 50s


In [33]:
ft_scraping.data.shape

(330, 29)

In [34]:
ft_scraping.data.head(10)

Unnamed: 0,title,link,image_url,country,district,neighborhood,street,zipCode,province,buildingType,...,surface,energyCertificate,hotWater,heating,conservationState,antiquity,floor,surfaceLand,otherFeatures,price
0,Piso en venta en Calle del Ferrocarril,https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,Arganzuela,Palos de Moguer,Calle del Ferrocarril,28045,Madrid,Flat,...,74,G,Gas Natural,Gas Natural,A reformar,50 a 70 años,7,0,{},293000
1,Piso en venta en Calle de Maestre Felisa Lozano,https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,Hortaleza,Palomas,Calle de Maestre Felisa Lozano,28042,Madrid,Flat,...,147,G,,,,20 a 30 años,6,0,"{'1': 'Aire acondicionado', '2': 'Armarios', '...",760000
2,Piso en venta en Calle de Canillas,https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,Chamartín,Prosperidad,Calle de Canillas,28002,Madrid,Flat,...,68,G,,,,50 a 70 años,6,0,"{'2': 'Armarios', '3': 'Calefacción', '21': 'E...",322000
3,Piso en venta en Barrio de la Fortuna,https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,Latina,Cuatro vientos,Barrio de la Fortuna,28044,Madrid,Flat,...,110,D,,,,,6,0,"{'1': 'Aire acondicionado', '3': 'Calefacción'...",385000
4,Piso en venta en Calle de Santa Casilda,https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,Arganzuela,Imperial,Calle de Santa Casilda,28005,Madrid,Flat,...,42,G,,Gas Natural,Bien,50 a 70 años,9,0,{'2': 'Armarios'},215000
5,Casa adosada en venta,https://www.fotocasa.es/es/comprar/vivienda/vi...,https://static.inmofactory.com/images/inmofact...,España,Centro,,,28670,Madrid,Flat,...,237,G,Gas Natural,,Bien,20 a 30 años,0,0,"{'2': 'Armarios', '3': 'Calefacción', '6': 'Gr...",565000
6,Piso en venta,https://www.fotocasa.es/es/comprar/vivienda/fu...,https://static.inmofactory.com/images/inmofact...,España,Fuenlabrada II - El Molino,,,28943,Madrid,Flat,...,84,G,Gas Natural,,Casi nuevo,,7,0,"{'1': 'Aire acondicionado', '3': 'Calefacción'...",172260
7,Piso en venta,https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,Casco Antiguo,,,28220,Madrid,Flat,...,109,G,Gas Natural,Gas Natural,Casi nuevo,20 a 30 años,6,0,"{'1': 'Aire acondicionado', '2': 'Armarios', '...",320000
8,Piso en venta,https://www.fotocasa.es/es/comprar/vivienda/la...,https://static.inmofactory.com/images/inmofact...,España,Monte Rozas,,,28232,Madrid,Flat,...,150,G,,Gas Natural,Muy bien,20 a 30 años,6,0,"{'2': 'Armarios', '9': 'Parquet', '10': 'Terra...",495000
9,Piso en venta,https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,Golf - El Carralero,,,28221,Madrid,Flat,...,163,D,,Gas Natural,Muy bien,10 a 20 años,6,0,"{'1': 'Aire acondicionado', '2': 'Armarios', '...",795000


In [242]:
ft_scraping.data.tail(10)

Unnamed: 0,title,link,image_url,country,district,neighborhood,street,zipCode,province,buildingType,...,surface,energyCertificate,hotWater,heating,conservationState,antiquity,floor,surfaceLand,otherFeatures,price
20,"Piso en venta en Calle Escalinata, 25",https://www.fotocasa.es/es/comprar/vivienda/gu...,https://static.inmofactory.com/images/inmofact...,España,,,"Calle Escalinata, 25",28440,Madrid,Flat,...,95,G,,,Bien,30 a 50 años,8,0,"{'1': 'Aire acondicionado', '2': 'Armarios', '...",195000
21,Piso en venta en Avenida de las Ciudades,https://www.fotocasa.es/es/comprar/vivienda/ge...,https://static.inmofactory.com/images/inmofact...,España,Getafe Norte,,Avenida de las Ciudades,28903,Madrid,Flat,...,65,G,Electricidad,Electricidad,Muy bien,50 a 70 años,9,0,"{'1': 'Aire acondicionado', '2': 'Armarios', '...",120000
22,Piso en venta en Calle de Benimámet,https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,Villaverde,San Cristóbal,Calle de Benimámet,28021,Madrid,Flat,...,60,E,Gas Natural,Gas Natural,Muy bien,,6,0,"{'2': 'Armarios', '6': 'Gres Cerámica', '22': ...",106050
23,"Piso en venta en Calle Carlos IV, 4",https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,San Blas,Simancas,"Calle Carlos IV, 4",28037,Madrid,Flat,...,125,C,,,,10 a 20 años,7,0,"{'1': 'Aire acondicionado', '2': 'Armarios', '...",395000
24,Piso en venta en Calle de la Cañada,https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,Moratalaz,Vinateros,Calle de la Cañada,28030,Madrid,Flat,...,91,G,,,Bien,50 a 70 años,12,0,"{'1': 'Aire acondicionado', '2': 'Armarios', '...",259000
25,Piso en venta en Calle de Blas Cabrera,https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,Latina,Las Águilas,Calle de Blas Cabrera,28044,Madrid,Flat,...,69,G,Gas Natural,Gas Natural,Bien,50 a 70 años,3,0,{},130000
26,Piso en venta,https://www.fotocasa.es/es/comprar/vivienda/pa...,https://static.inmofactory.com/images/inmofact...,España,Calle Pinto - San Roque,,,28982,Madrid,Flat,...,90,E,Gas Natural,Gas Natural,Casi nuevo,30 a 50 años,7,0,"{'1': 'Aire acondicionado', '2': 'Armarios', '...",134900
27,Piso en venta en Calle Ordicia,https://www.fotocasa.es/es/comprar/vivienda/ma...,https://static.inmofactory.com/images/inmofact...,España,Usera,Orcasitas,Calle Ordicia,28041,Madrid,Flat,...,95,G,Gas Natural,Gas Natural,Bien,10 a 20 años,9,0,"{'1': 'Aire acondicionado', '2': 'Armarios', '...",255000
28,Piso en venta,https://www.fotocasa.es/es/comprar/vivienda/co...,https://static.inmofactory.com/images/inmofact...,España,Valleaguado - La Cañada,,,28822,Madrid,Flat,...,70,G,,,Bien,,13,0,"{'1': 'Aire acondicionado', '3': 'Calefacción'...",169500
29,Piso en venta,https://www.fotocasa.es/es/comprar/vivienda/la...,https://static.inmofactory.com/images/inmofact...,España,El Cantizal,,,28232,Madrid,Flat,...,83,G,Gas Natural,Gas Natural,Casi nuevo,10 a 20 años,6,0,"{'1': 'Aire acondicionado', '2': 'Armarios', '...",390000


In [150]:
ft_scraping.data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28944 entries, 0 to 28943
Data columns (total 29 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   title              28944 non-null  object
 1   link               28944 non-null  object
 2   image_url          28944 non-null  object
 3   country            28944 non-null  object
 4   district           28944 non-null  object
 5   neighborhood       28944 non-null  object
 6   street             28944 non-null  object
 7   zipCode            28944 non-null  object
 8   province           28944 non-null  object
 9   buildingType       28944 non-null  object
 10  clientAlias        28944 non-null  object
 11  latitude           28944 non-null  object
 12  longitude          28944 non-null  object
 13  isNewConstruction  28944 non-null  object
 14  rooms              28944 non-null  object
 15  bathrooms          28944 non-null  object
 16  parking            28944 non-null  objec

In [151]:
ft_scraping.data.columns

Index(['title', 'link', 'image_url', 'country', 'district', 'neighborhood',
       'street', 'zipCode', 'province', 'buildingType', 'clientAlias',
       'latitude', 'longitude', 'isNewConstruction', 'rooms', 'bathrooms',
       'parking', 'elevator', 'furnished', 'surface', 'energyCertificate',
       'hotWater', 'heating', 'conservationState', 'antiquity', 'floor',
       'surfaceLand', 'otherFeatures', 'price'],
      dtype='object')