# 📊 Web Scraping - PropyApp
- Samuel Pérez Hurtado
- Sebastian Monsalve Gómez
- David Romero Rodríguez

In [1]:
import pandas as pd
import requests

In [2]:
headers = {
    "USER_AGENT": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.80 Chrome/71.0.3578.80 Safari/537.36",
    "referer": "https://fincaraiz.com.co/"
}

json_data = {
    'filter': {
        'offer': {
            'slug': [
                'sell',
            ],
        },
        'is_new': 'False',
        'locations': {
            'cities': {
                'name': ['Medellín']
            }
        },
    },
    'fields': {
        'exclude': [],
        'include': [
            'area',
            'baths.id',
            'baths.name',
            'client.client_type',
            'client.company_name',
            'client.first_name',
            'client.last_name',
            'garages.name',
            'is_new',
            'locations.cities.name',
            'locations.cities.slug',
            'locations.countries.name',
            'locations.countries.slug',
            'locations.groups.name',
            'locations.groups.slug',
            'locations.groups.subgroups.name',
            'locations.groups.subgroups.slug',
            'locations.location_point',
            'locations.neighbourhoods.name',
            'locations.neighbourhoods.slug',
            'locations.states.name',
            'locations.states.slug',
            'min_area',
            'min_price',
            'price',
            'products.configuration.tag_name',
            'products.label',
            'products.name',
            'products.slug',
            'property_id',
            'fr_property_id',
            'rooms.name',
            'title',
            'property_type.name',
            'offer.name',
            'fr_parent_property_id',
            'stratum.name',
        ],
        'limit': 25,
        'offset': 0,
        'ordering': [],
        'platform': 40,
        'with_algorithm': False,
    }
}

In [4]:
response = requests.post('https://api.fincaraiz.com.co/document/api/1.0/listing/search', headers=headers, json=json_data, timeout=10)
data = response.json()
data

{'took': 12,
 'timed_out': False,
 '_shards': {'total': 3, 'successful': 3, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 26992, 'relation': 'eq'},
  'max_score': 0.0,
  'hits': [{'_index': 'fr-site-listing',
    '_id': '5b59f4a2-1208-481a-b451-80fb02fe2e1b',
    '_score': 0.0,
    '_source': {'listing': {'area': '50.0',
      'rooms': {'name': '3'},
      'is_new': False,
      'title': 'Apartamento en Venta',
      'property_id': '5b59f4a2-1208-481a-b451-80fb02fe2e1b',
      'products': [{'configuration': {'tag_name': ''},
        'name': 'Aviso Adicional',
        'label': 'MA',
        'slug': 'PAID_QUOTA'}],
      'stratum': {'name': 'Estrato 3'},
      'offer': [{'name': 'Venta'}],
      'garages': {'name': 'Sin especificar'},
      'baths': {'name': '1', 'id': 1},
      'min_price': '0',
      'fr_parent_property_id': 0,
      'price': '210000000.0',
      'min_area': '0',
      'client': {'company_name': 'Arrendamientos Alnago',
       'last_name': '',
       'client

In [None]:
def obtener_datos(offset):
    json_data['fields']['offset'] = offset
    response = requests.post('https://api.fincaraiz.com.co/document/api/1.0/listing/search', headers=headers, json=json_data, timeout=10)
    data = response.json()
    return data.get('hits', {}).get('hits', [])

num_paginas = 1 #Esta es la máxima cantidad de páginas que podemos extraer

properties_data = []

offset = 0

for pagina in range(num_paginas):
    hits = obtener_datos(offset)

    if not hits:
        break

    for hit in hits:
        source = hit['_source']['listing']
        property_info = {
            'area': source['area'],
            'rooms': source['rooms']['name'],
            'is_new': source['is_new'],
            'property_id': source['property_id'],
            'garages': source['garages']['name'],
            'stratum': source['stratum']['name'],
            'property_type': source['property_type'][0]['name'],
            'baths': source['baths']['name'],
            'min_price': source['min_price'],
            'price': source['price'],
            'company_name': source['client']['company_name'],
            'client_type': source['client']['client_type'],
            'neighbourhood': source['locations']['neighbourhoods'][0]['name'],
            'city': source['locations']['cities'][0]['name'],
            'latitude': source['locations']['location_point'].split()[1][1:],
            'longitude': source['locations']['location_point'].split('(')[1].split()[1].rstrip(')'),
            'zone': source['locations'].get('groups', [{'subgroups': {'name': 'No Zone'}}])[0]['subgroups']['name']
        }
        properties_data.append(property_info)

    offset += 25

df = pd.DataFrame(properties_data)
df.drop_duplicates(subset='property_id', keep='first', inplace=True)

In [4]:
df

Unnamed: 0,area,rooms,is_new,property_id,garages,stratum,property_type,baths,min_price,price,company_name,client_type,neighbourhood,city,latitude,longitude,zone
0,50.0,3,False,5b59f4a2-1208-481a-b451-80fb02fe2e1b,Sin especificar,Estrato 3,Apartamento,1,0,210000000.0,Arrendamientos Alnago,BROKER,Aranjuez,Medellín,-75.55608367919922,6.281782627105713,NorOriente
1,185.0,3,False,4275fdf4-8382-43f4-8e4b-f330031d50a8,2,Estrato 6,Apartamento,3,0,1040000000.0,ASESORIA INMOBILIARIA Y PROYECTO,BROKER,Las palmas,Medellín,-75.55524826049805,6.180519207134783,No Zone
2,65.0,3,False,3840da3b-dffd-4c79-8ce0-6e40ac1da3be,Sin especificar,Estrato 2,Apartamento,1,0,165000000.0,Arrendamientos Alnago,BROKER,Manrique,Medellín,-75.5600357055664,6.287129878997803,NorOriente
3,37.0,2,False,0da20ec5-8e25-4e7a-9e04-f1e33536f8b0,Sin especificar,Estrato 3,Apartamento,1,0,145000000.0,Arrendamientos Alnago,BROKER,Loreto,Medellín,-75.55716705322266,6.23033332824707,No Zone
4,95.0,3,False,877904f1-7876-4dd6-b858-457b62018eb6,1,Estrato 3,Apartamento,2,0,300000000.0,Arrendamientos Alnago,BROKER,Villa Hermosa,Medellín,-75.54766082763672,6.275725841522217,NorOriente
5,87.0,3,False,03a0f0ef-8241-4241-b897-70c23b46bec8,Sin especificar,Estrato 3,Apartamento,2,0,175000000.0,Arrendamientos Alnago,BROKER,Centro,Medellín,-75.5658187866211,6.247637748718262,Centro
6,37.0,2,False,97d5c198-7ca1-4f63-a192-1e85df8af695,Sin especificar,Estrato 3,Apartamento,1,0,150000000.0,Arrendamientos Alnago,BROKER,Loreto,Medellín,-75.55716705322266,6.23033332824707,No Zone
7,72.0,3,False,45bcc478-7d65-4896-a6cd-66885c0078ae,1,Estrato 3,Apartamento,2,0,285000000.0,Proventa Inmobiliaria,BROKER,Rodeo Alto,Medellín,-75.60260772705078,6.212916851043701,SurOccidente
8,135.0,3,False,60311440-7ef3-4907-9f12-c3b35b48f202,1,Estrato 5,Apartamento,2,0,600000000.0,Arrendamientos Santa Fe,BROKER,Laureles,Medellín,-75.59723663330078,6.246053695678711,Occidente
9,177.0,3,False,ed6a96d9-37f4-4b0d-9f27-e17cd134916e,4,Estrato 6,Apartamento,4,0,1380000000.0,Compro Casa,BROKER,San Lucas,Medellín,-75.56405639648438,6.179944515228272,SurOriente


In [13]:
df.set_index('property_id').head()

Unnamed: 0_level_0,area,rooms,is_new,garages,stratum,property_type,baths,min_price,price,company_name,client_type,neighbourhood,city,latitude,longitude,zone
property_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
5b59f4a2-1208-481a-b451-80fb02fe2e1b,50.0,3,False,Sin especificar,Estrato 3,Apartamento,1,0,210000000.0,Arrendamientos Alnago,BROKER,Aranjuez,Medellín,-75.55608367919922,6.281782627105713,NorOriente
acbbcf61-3022-4d34-87b9-d240ae7cb7b2,67.0,3,False,1,Estrato 3,Apartamento,2,0,340000000.0,Arrendamientos Alnago,BROKER,Urbanizacion praderas de guayabal,Itaguí,-75.59156036376953,6.235456943511963,No Zone
7dd6770d-b5f2-4106-81bd-94c39df2999f,96.0,3,False,Sin especificar,Estrato 3,Apartamento,2,0,295000000.0,Arrendamientos Alnago,BROKER,Aranjuez,Medellín,-75.5658187866211,6.247637748718262,NorOriente
4275fdf4-8382-43f4-8e4b-f330031d50a8,185.0,3,False,2,Estrato 6,Apartamento,3,0,1040000000.0,ASESORIA INMOBILIARIA Y PROYECTO,BROKER,Las palmas,Medellín,-75.55524826049805,6.180519207134783,No Zone
3840da3b-dffd-4c79-8ce0-6e40ac1da3be,65.0,3,False,Sin especificar,Estrato 2,Apartamento,1,0,165000000.0,Arrendamientos Alnago,BROKER,Manrique,Medellín,-75.5600357055664,6.287129878997803,NorOriente


In [7]:
df.to_csv('data/medellin.csv',index = False)