# Análise Exploratória de Dados

## Preparação

In [322]:
import warnings

import polars as pl

from df_utils import (
    get_list_column_max_len,
    normalize_schemas,
    one_hot_encode_list_column,
)
from geo_location import extract_polygons_from_folder, mark_points_in_polygons

warnings.filterwarnings('ignore', category=pl.exceptions.MapWithoutReturnDtypeWarning)

## Leitura de dados

Os dados se encontram no formato NDJSON, também conhecido como JSONLines,
em que há um objeto JSON por linha no arquivo.

In [323]:
neighborhoods = ["cidade_baixa", "centro_historico", "menino_deus", "sarandi"]

neighborhoods_df_map = {
    neighborhood: pl.read_ndjson(f"data/{neighborhood}_listings.json")
    for neighborhood in neighborhoods
}

## Entendimento dos dados e pré-processamento

Gostaríamos de concatenar os dados das diferentes fontes, para facilitar a análise.

In [324]:
try:
    pl.concat(neighborhoods_df_map.values())
except pl.exceptions.SchemaError:
    print("Os dados não têm esquema homogêneo.")

Os dados não têm esquema homogêneo.


A concatenação direta não é possível, pois a estrutura dos dados não é homogênea.

In [325]:
for name, df in neighborhoods_df_map.items():
    print(name)
    print("=" * len(name))
    print(df.schema)
    print()

cidade_baixa
Schema([('listing', Struct({'legacyId': String, 'amenities': List(String), 'title': String, 'unitTypes': List(String), 'updatedAt': String, 'resale': Boolean, 'address': Struct({'city': String, 'neighborhood': String, 'street': String, 'streetNumber': String, 'point': Struct({'source': String, 'approximateLat': Float64, 'approximateLon': Float64, 'radius': Int64, 'lat': Float64, 'lon': Float64}), 'stateAcronym': String}), 'nonActivationReason': String, 'parkingSpaces': List(Int64), 'portal': String, 'whatsappNumber': String, 'acceptExchange': Boolean, 'listingsCount': Int64, 'createdAt': String, 'bedrooms': List(Int64), 'advertiserId': String, 'floors': List(Int64), 'totalAreas': List(Int64), 'listingType': String, 'unitsOnTheFloor': Int64, 'publicationType': String, 'advertiserContact': Struct({'chat': String, 'phones': List(String)}), 'showPrice': Boolean, 'providerId': String, 'id': String, 'externalId': String, 'buildings': Int64, 'bathrooms': List(Int64), 'constructio

A maior diferença parece estar na estrutura aninhada `rentalInfo`,
que não é presente nos dados vindos do bairro Sarandi.

In [326]:
dfs = list(neighborhoods_df_map.values())
normalized_dfs = normalize_schemas(dfs)
df = pl.concat(normalized_dfs)

In [327]:
df.head(2)

listing,account,medias,accountLink,link
struct[43],struct[9],list[struct[3]],struct[4],struct[4]
"{""26349587"",[""DISABLED_ACCESS"", ""INTEGRATED_ENVIRONMENTS"", … ""PARTY_HALL""],""Apartamento de 46 metros quadrados no bairro Cidade Baixa com 1 quarto"",[""APARTMENT""],""2024-12-17T11:12:11.169+00:00"",false,{""Porto Alegre"",""Cidade Baixa"",""Rua Joaquim Nabuco"",null,{""GOOGLE"",-30.041,-51.219,140,null,null},""RS""},""NonActivationReason_NONE"",[],""GRUPOZAP"","""",true,null,""2019-11-05T13:09:45.557+00:00"",[1],""dfadbe19-2554-5480-56eb-0780d0ee4019"",[8],[48],""USED"",4,""PREMIUM"",{"""",[""5130130100""]},true,"""",""2463567709"",""ZAP1593841"",0,[1],""ConstructionStatus_NONE"",[],""ACTIVE"",""OWNER"",[{""SALE"",600,290000,300,{null,[],null}}],0,""a5adbade-589d-3c61-97b7-e4014f21cf3d"",[46],[],""STREET"",""Apartamento em excelente estado, iluminado e silencioso, mobiliado com móveis embutidos , conceito aberto, cozinha e sala integrados, gás central, aquecimento em todas as torneiras, fica ar condicionado do quarto e todos os móveis embutidos quarto, banheiro, sala, cozinha e homeoffice."",[""RESIDENTIAL""],""UNIT"",[],[0]}","{""dfadbe19-2554-5480-56eb-0780d0ee4019"",""BÁRBARA EINSFELD DE BORBA"",null,"""",true,345436,3136761,""2018-05-11T05:14:21Z"",""""}","[{""7cb4b1098bf75603a7f2561456bc2883"",""https://resizedimgs.zapimoveis.com.br/{action}/{width}x{height}/vr.images.sp/7cb4b1098bf75603a7f2561456bc2883.webp"",""IMAGE""}, {""3ad45448360d515b4bbb3d833537b5e4"",""https://resizedimgs.zapimoveis.com.br/{action}/{width}x{height}/vr.images.sp/3ad45448360d515b4bbb3d833537b5e4.webp"",""IMAGE""}, … {""3c74183638571539a2c2f9f568a72d7a"",""https://resizedimgs.zapimoveis.com.br/{action}/{width}x{height}/vr.images.sp/3c74183638571539a2c2f9f568a72d7a.webp"",""IMAGE""}]","{""BÁRBARA EINSFELD DE BORBA"",""/imobiliaria/345436/"","""",{}}","{""Apartamento com 1 Quarto à venda, 46m²"",""/imovel/venda-apartamento-1-quarto-mobiliado-cidade-baixa-porto-alegre-46m2-id-2463567709/"","""",{""Porto Alegre"",""Cidade Baixa"","""",""Rua Joaquim Nabuco"","""",""""}}"
"{"""",[],""Compre apartamento silencioso,55,10m2 de área útil,2 quartos, piso parquet,living 2 ambientes,banhei"",[""APARTMENT""],""2024-12-26T12:00:41.422+00:00"",false,{""Porto Alegre"",""Cidade Baixa"",""Rua José do Patrocínio"",""357"",{""GOOGLE"",null,null,null,-30.038562,-51.224749},""RS""},""NonActivationReason_NONE"",[],""GRUPOZAP"",""51999671489"",false,7,""2024-12-01T21:22:02.001+00:00"",[2],""90879777-b9cc-65b9-28c8-c86f00947af9"",[],[66],""USED"",0,""PREMIUM"",{"""",[""5132084035"", ""51999671489""]},true,""13887"",""2760694515"",""VR527903"",0,[1],""ConstructionStatus_NONE"",[],""ACTIVE"",""REAL_ESTATE"",[{""SALE"",0,275000,290,{null,[],null}}],0,""40017671-36da-3cb2-98e7-116dd4abcba1"",[56],[0],""ALL"",""Compre apartamento silencioso,55,10m2 de área útil,2 quartos, piso parquet,living 2 ambientes,banheiro social ,cozinha e área de serviço. O condomínio fica localizado em Rua José do Patrocínio no bairro Cidade Baixa em Porto Alegre. Está bem situado, próximo a pontos de interesse de Cidade Baixa, tais como Azambuja, Praça General Daltro Filho, Faculdade de Direito da Funda. Escola Sup. do Ministério Público, Praça Salvador Allende, e Policlínica Militar de Porto Alegre.Aceitamos Fiinanciamento Bancário e Fgts.Marque hoje mesmo uma visita com um de nossos Consultores Imobiliários caddstrados! ]]>"",[""RESIDENTIAL""],""UNIT"",[],[]}","{""90879777-b9cc-65b9-28c8-c86f00947af9"",""Sperinde Vendas"",""https://resizedimgs.zapimoveis.com.br/{action}/{width}x{height}/vr.images.sp/fb5adab24465e4ba59ba7d6e4ac1de9e.webp"",""00411-J-RS"",true,33518,2594637,""2018-03-27T18:49:45Z"",""diamond""}","[{""3d8f346437754cf315cc942816497178"",""https://resizedimgs.zapimoveis.com.br/{action}/{width}x{height}/vr.images.sp/3d8f346437754cf315cc942816497178.webp"",""IMAGE""}, {""d309f06d26092646e716b47e93936c7d"",""https://resizedimgs.zapimoveis.com.br/{action}/{width}x{height}/vr.images.sp/d309f06d26092646e716b47e93936c7d.webp"",""IMAGE""}, … {""e0ddfeed8f77c5410adbbbe842aba483"",""https://resizedimgs.zapimoveis.com.br/{action}/{width}x{height}/vr.images.sp/e0ddfeed8f77c5410adbbbe842aba483.webp"",""IMAGE""}]","{""Sperinde Vendas"",""/imobiliaria/33518/"","""",{}}","{""Apartamento com 2 Quartos à venda, 56m²"",""/imovel/venda-apartamento-2-quartos-cidade-baixa-porto-alegre-56m2-id-2760694515/"","""",{""Porto Alegre"",""Cidade Baixa"","""",""Rua José do Patrocínio"",""357"",""""}}"


A maior parte das informações que consideramos importantes para nossa análise estão
na coluna `listing`, que tem uma estrutura aninhada complexa.
Aplainamos a estrutura para simplificar a análise, e ainda mantemos o link do anúncio,
presente na coluna `link`, e informações do anunciante (`account`)
para facilitar a consulta para análises mais aprofundadas.

In [328]:
df = df.select([pl.col("listing").struct.unnest(), pl.col("account"), pl.col("link")])
df = df.rename({"id": "listingId"})
df = df.select([pl.all().exclude("account"), pl.col("account").struct.unnest()])
df = df.rename(
    {
        "id": "accountId",
        "name": "accountName",
        "createdDate": "accountCreatedDate",
        "tier": "accountTier",
    }
)
df = df.select(
    [pl.all().exclude("link"), pl.col("link").struct.field("href").alias("link_href")]
)

In [329]:
df.head(2)

legacyId,amenities,title,unitTypes,updatedAt,resale,address,nonActivationReason,parkingSpaces,portal,whatsappNumber,acceptExchange,listingsCount,createdAt,bedrooms,advertiserId,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,advertiserContact,showPrice,providerId,listingId,externalId,buildings,bathrooms,constructionStatus,stamps,status,contractType,pricingInfos,unitFloor,sourceId,usableAreas,suites,displayAddressType,description,usageTypes,propertyType,unitSubTypes,capacityLimit,accountId,accountName,logoUrl,licenseNumber,showAddress,legacyVivarealId,legacyZapId,accountCreatedDate,accountTier,link_href
str,list[str],str,list[str],str,bool,struct[6],str,list[i64],str,str,bool,i64,str,list[i64],str,list[i64],list[i64],str,i64,str,struct[2],bool,str,str,str,i64,list[i64],str,list[str],str,str,list[struct[5]],i64,str,list[i64],list[i64],str,str,list[str],str,list[str],list[i64],str,str,str,str,bool,i64,i64,str,str,str
"""26349587""","[""DISABLED_ACCESS"", ""INTEGRATED_ENVIRONMENTS"", … ""PARTY_HALL""]","""Apartamento de 46 metros quadr…","[""APARTMENT""]","""2024-12-17T11:12:11.169+00:00""",False,"{""Porto Alegre"",""Cidade Baixa"",""Rua Joaquim Nabuco"",null,{""GOOGLE"",-30.041,-51.219,140,null,null},""RS""}","""NonActivationReason_NONE""",[],"""GRUPOZAP""","""""",True,,"""2019-11-05T13:09:45.557+00:00""",[1],"""dfadbe19-2554-5480-56eb-0780d0…",[8],[48],"""USED""",4,"""PREMIUM""","{"""",[""5130130100""]}",True,"""""","""2463567709""","""ZAP1593841""",0,[1],"""ConstructionStatus_NONE""",[],"""ACTIVE""","""OWNER""","[{""SALE"",600,290000,300,{null,[],null}}]",0,"""a5adbade-589d-3c61-97b7-e4014f…",[46],[],"""STREET""","""Apartamento em excelente estad…","[""RESIDENTIAL""]","""UNIT""",[],[0],"""dfadbe19-2554-5480-56eb-0780d0…","""BÁRBARA EINSFELD DE BORBA""",,"""""",True,345436,3136761,"""2018-05-11T05:14:21Z""","""""","""/imovel/venda-apartamento-1-qu…"
"""""",[],"""Compre apartamento silencioso,…","[""APARTMENT""]","""2024-12-26T12:00:41.422+00:00""",False,"{""Porto Alegre"",""Cidade Baixa"",""Rua José do Patrocínio"",""357"",{""GOOGLE"",null,null,null,-30.038562,-51.224749},""RS""}","""NonActivationReason_NONE""",[],"""GRUPOZAP""","""51999671489""",False,7.0,"""2024-12-01T21:22:02.001+00:00""",[2],"""90879777-b9cc-65b9-28c8-c86f00…",[],[66],"""USED""",0,"""PREMIUM""","{"""",[""5132084035"", ""51999671489""]}",True,"""13887""","""2760694515""","""VR527903""",0,[1],"""ConstructionStatus_NONE""",[],"""ACTIVE""","""REAL_ESTATE""","[{""SALE"",0,275000,290,{null,[],null}}]",0,"""40017671-36da-3cb2-98e7-116dd4…",[56],[0],"""ALL""","""Compre apartamento silencioso,…","[""RESIDENTIAL""]","""UNIT""",[],[],"""90879777-b9cc-65b9-28c8-c86f00…","""Sperinde Vendas""","""https://resizedimgs.zapimoveis…","""00411-J-RS""",True,33518,2594637,"""2018-03-27T18:49:45Z""","""diamond""","""/imovel/venda-apartamento-2-qu…"


Qual o tamanho da massa de dados?

In [330]:
df.height

17341

Obtivemos 17.341 amostras.

Quantas amostras de cada bairro?

In [331]:
df = df.unnest("address")

In [332]:
df.group_by("neighborhood").len().sort(by="len", descending=True)

neighborhood,len
str,u32
"""Centro Histórico""",6407
"""Menino Deus""",5215
"""Sarandi""",2615
"""Cidade Baixa""",2052
"""Santa Tereza""",268
…,…
"""Jardim Europa""",1
"""Humaitá""",1
"""Agronomia""",1
"""Rio Branco""",1


Nota-se que o processo de extração trouxe dados de outros bairros, além dos desejados,
mas em menor quantidade.

In [333]:
target_neighborhoods = ["Centro Histórico", "Cidade Baixa", "Menino Deus", "Sarandi"]

In [334]:
df = df.filter(pl.col("neighborhood").is_in(target_neighborhoods))

In [335]:
df.group_by("neighborhood").len().sort(by="len", descending=True)

neighborhood,len
str,u32
"""Centro Histórico""",6407
"""Menino Deus""",5215
"""Sarandi""",2615
"""Cidade Baixa""",2052


In [336]:
df.height

16289

Restam 16.289 amostras.

A massa de dados é relativamente pequena, totalizando 16.289 imóveis.

Existem dados duplicados?

In [337]:
df.filter(pl.col("listingId").is_duplicated()).height

1708

Sim, 1.708 amostras são duplicadas.

In [343]:
df = df.unique(subset="listingId")

In [344]:
df.height

15432

In [345]:
df.group_by("neighborhood").len().sort(by="len", descending=True)

neighborhood,len
str,u32
"""Centro Histórico""",6059
"""Menino Deus""",4815
"""Sarandi""",2551
"""Cidade Baixa""",2007


### Descarte de informações não importantes

Existe muita informação ruidosa nesses dados.
Escolhemos as seguintes colunas para descarte.

In [346]:
df.head(1)

legacyId,amenities,title,unitTypes,updatedAt,resale,city,neighborhood,street,streetNumber,point,stateAcronym,nonActivationReason,parkingSpaces,portal,whatsappNumber,acceptExchange,listingsCount,createdAt,bedrooms,advertiserId,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,advertiserContact,showPrice,providerId,listingId,externalId,buildings,bathrooms,constructionStatus,stamps,status,contractType,pricingInfos,unitFloor,sourceId,usableAreas,suites,displayAddressType,description,usageTypes,propertyType,unitSubTypes,capacityLimit,accountId,accountName,logoUrl,licenseNumber,showAddress,legacyVivarealId,legacyZapId,accountCreatedDate,accountTier,link_href
str,list[str],str,list[str],str,bool,str,str,str,str,struct[6],str,str,list[i64],str,str,bool,i64,str,list[i64],str,list[i64],list[i64],str,i64,str,struct[2],bool,str,str,str,i64,list[i64],str,list[str],str,str,list[struct[5]],i64,str,list[i64],list[i64],str,str,list[str],str,list[str],list[i64],str,str,str,str,bool,i64,i64,str,str,str
"""""","[""CABLE_TV"", ""WATCHMAN"", … ""INTERCOM""]","""Excelente oportunidade! Reserv…","[""CONDOMINIUM""]","""2024-12-25T23:31:38.579+00:00""",False,"""Porto Alegre""","""Sarandi""","""Avenida Ecoville""","""190""","{""GOOGLE"",null,null,null,-29.995522,-51.12181}","""RS""","""NonActivationReason_NONE""",[4],"""GRUPOZAP""","""51980148586""",False,,"""2024-10-08T12:40:16.613+00:00""",[3],"""0f4fcd4f-2e91-4211-8062-f0f0ca…",[],[346],"""USED""",0,"""STANDARD""","{"""",[""5134148586"", ""51980148586""]}",True,"""45354""","""2747898832""","""9922763""",0,[3],"""ConstructionStatus_NONE""","[""DATAZAP_APPROVED_SALE""]","""ACTIVE""","""REAL_ESTATE""","[{""SALE"",null,1470000,750,{null,[],null}}]",0,"""bed8b6ee-40c6-3795-827b-1e6972…",[370],[1],"""ALL""","""Crédito Real Vende excelente c…","[""RESIDENTIAL""]","""UNIT""",[],[],"""0f4fcd4f-2e91-4211-8062-f0f0ca…","""Crédito Real | Zona Norte""","""https://resizedimgs.zapimoveis…","""""",False,554181,3588750,"""2019-07-05T19:36:59Z""","""""","""/imovel/venda-casa-de-condomin…"


In [347]:
COLUMNS_TO_DROP = [
    "advertiserId",
    "constructionStatus",
    "portal",
    "stamps",
    "advertiserContact",
    "whatsappNumber",
    "title",
    "nonActivationReason",
    "status",
    "legacyId",
    "externalId",
    "listingsCount",
    "createdAt",
    "updatedAt",
    "showPrice",
    "acceptExchange",
    "description",
    "sourceId",
    "providerId",
    "accountName",
    "accountCreatedDate",
    "accountTier",
    "licenseNumber",
    "logoUrl",
    "legacyVivarealId",
    "legacyZapId",
    "showAddress"
]

df = df.drop(COLUMNS_TO_DROP)

In [348]:
df.head(3)

amenities,unitTypes,resale,city,neighborhood,street,streetNumber,point,stateAcronym,parkingSpaces,bedrooms,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,listingId,buildings,bathrooms,contractType,pricingInfos,unitFloor,usableAreas,suites,displayAddressType,usageTypes,propertyType,unitSubTypes,capacityLimit,accountId,link_href
list[str],list[str],bool,str,str,str,str,struct[6],str,list[i64],list[i64],list[i64],list[i64],str,i64,str,str,i64,list[i64],str,list[struct[5]],i64,list[i64],list[i64],str,list[str],str,list[str],list[i64],str,str
"[""CABLE_TV"", ""WATCHMAN"", … ""INTERCOM""]","[""CONDOMINIUM""]",False,"""Porto Alegre""","""Sarandi""","""Avenida Ecoville""","""190""","{""GOOGLE"",null,null,null,-29.995522,-51.12181}","""RS""",[4],[3],[],[346],"""USED""",0,"""STANDARD""","""2747898832""",0,[3],"""REAL_ESTATE""","[{""SALE"",null,1470000,750,{null,[],null}}]",0,[370],[1],"""ALL""","[""RESIDENTIAL""]","""UNIT""",[],[],"""0f4fcd4f-2e91-4211-8062-f0f0ca…","""/imovel/venda-casa-de-condomin…"
[],"[""OFFICE""]",False,"""Porto Alegre""","""Centro Histórico""","""Rua Coronel Fernando Machado""","""981""","{""GOOGLE"",null,null,null,-30.035119,-51.227712}","""RS""",[],[0],[],[109],"""USED""",0,"""STANDARD""","""2762307994""",0,[1],"""REAL_ESTATE""","[{""SALE"",68,320000,448,{null,[],null}}]",0,[106],[0],"""ALL""","[""COMMERCIAL""]","""UNIT""",[],[],"""10929a8c-7227-5aff-fb05-6bf394…","""/imovel/venda-conjunto-comerci…"
"[""GARAGE""]","[""APARTMENT""]",False,"""Porto Alegre""","""Sarandi""","""Rua Gabriel Franco da Luz""","""560""","{""GOOGLE"",null,null,null,-29.981459,-51.123619}","""RS""",[1],[2],[],[1],"""USED""",0,"""STANDARD""","""2742952916""",0,[1],"""REAL_ESTATE""","[{""SALE"",30,185000,214,{null,[],null}}]",0,[45],[0],"""ALL""","[""RESIDENTIAL""]","""UNIT""",[],[],"""b1c30c71-820c-306e-8445-5e50cc…","""/imovel/venda-apartamento-2-qu…"


### Atributos em forma de lista

Sabemos que `amenities` descrevem as amenidades presentes em um dado imóvel,
o que pode ser interessante transformar em atributos preditores para nossas regressões.

Entretanto, ainda precisamos entender outras colunas que tem listas como valores.
Várias delas não parecem fazer sentido em ter multiplicidade: em que contexto o número
de quartos (bedrooms) precisa ser uma lista? Número de banheiros? Área utilizável?
Uma unidade pode pertencer ao mesmo tempo a mais de um tipo (comercial, casa)?

Em primeiro lugar, tentamos entender quais listas de fato vêm a possuir mais de um
elemento.


In [349]:
LIST_COLUMNS = [
    "floors",
    "unitSubTypes",
    "suites",
    "unitTypes",
    "pricingInfos",
    "parkingSpaces",
    "totalAreas",
    "bathrooms",
    "bedrooms",
    "usableAreas",
    "usageTypes",
    "capacityLimit",
]

max_colname_width = max(len(name) for name in LIST_COLUMNS)
arrays_with_size_larger_than_one = []
for colname in LIST_COLUMNS:
    column_max_length = get_list_column_max_len(df, colname)
    print(f"{colname:<{max_colname_width}}: {column_max_length}")
    if column_max_length > 1:
        arrays_with_size_larger_than_one.append(colname)

floors       : 1
unitSubTypes : 2
suites       : 1
unitTypes    : 1
pricingInfos : 2
parkingSpaces: 2
totalAreas   : 2
bathrooms    : 2
bedrooms     : 2
usableAreas  : 2
usageTypes   : 2
capacityLimit: 1


A primeira coisa a se fazer, para facilitar o restante da nossa análise e pré-processamento,
é transformar aquelas colunas que são sempre listas de um elemento em colunas escalares.

In [350]:
single_element_list_columns = [
    "capacityLimit",
    "unitTypes",
    "floors",
    "suites",
]

df = df.with_columns(
    [pl.col(col).list.first().alias(col) for col in single_element_list_columns]
)

In [351]:
df.head(2)

amenities,unitTypes,resale,city,neighborhood,street,streetNumber,point,stateAcronym,parkingSpaces,bedrooms,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,listingId,buildings,bathrooms,contractType,pricingInfos,unitFloor,usableAreas,suites,displayAddressType,usageTypes,propertyType,unitSubTypes,capacityLimit,accountId,link_href
list[str],str,bool,str,str,str,str,struct[6],str,list[i64],list[i64],i64,list[i64],str,i64,str,str,i64,list[i64],str,list[struct[5]],i64,list[i64],i64,str,list[str],str,list[str],i64,str,str
"[""CABLE_TV"", ""WATCHMAN"", … ""INTERCOM""]","""CONDOMINIUM""",False,"""Porto Alegre""","""Sarandi""","""Avenida Ecoville""","""190""","{""GOOGLE"",null,null,null,-29.995522,-51.12181}","""RS""",[4],[3],,[346],"""USED""",0,"""STANDARD""","""2747898832""",0,[3],"""REAL_ESTATE""","[{""SALE"",null,1470000,750,{null,[],null}}]",0,[370],1,"""ALL""","[""RESIDENTIAL""]","""UNIT""",[],,"""0f4fcd4f-2e91-4211-8062-f0f0ca…","""/imovel/venda-casa-de-condomin…"
[],"""OFFICE""",False,"""Porto Alegre""","""Centro Histórico""","""Rua Coronel Fernando Machado""","""981""","{""GOOGLE"",null,null,null,-30.035119,-51.227712}","""RS""",[],[0],,[109],"""USED""",0,"""STANDARD""","""2762307994""",0,[1],"""REAL_ESTATE""","[{""SALE"",68,320000,448,{null,[],null}}]",0,[106],0,"""ALL""","[""COMMERCIAL""]","""UNIT""",[],,"""10929a8c-7227-5aff-fb05-6bf394…","""/imovel/venda-conjunto-comerci…"


A seguir, procuramos entender a coluna `pricingInfos`.

In [352]:
df.filter(pl.col("pricingInfos").list.len() > 1).head(3)

amenities,unitTypes,resale,city,neighborhood,street,streetNumber,point,stateAcronym,parkingSpaces,bedrooms,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,listingId,buildings,bathrooms,contractType,pricingInfos,unitFloor,usableAreas,suites,displayAddressType,usageTypes,propertyType,unitSubTypes,capacityLimit,accountId,link_href
list[str],str,bool,str,str,str,str,struct[6],str,list[i64],list[i64],i64,list[i64],str,i64,str,str,i64,list[i64],str,list[struct[5]],i64,list[i64],i64,str,list[str],str,list[str],i64,str,str
"[""GATED_COMMUNITY"", ""ALARM_SYSTEM""]","""CONDOMINIUM""",False,"""Porto Alegre""","""Sarandi""","""Avenida Willy Eugênio Fleck""","""1495""","{""GOOGLE"",null,null,null,-30.004463,-51.119792}","""RS""",[4],[4],0.0,[600],"""USED""",0,"""STANDARD""","""2764522928""",0,[5],"""REAL_ESTATE""","[{""RENTAL"",450,25000,2000,{null,[],null}}, {""SALE"",450,3500000,2000,{null,[],null}}]",0,[450],4.0,"""ALL""","[""RESIDENTIAL""]","""UNIT""",[],,"""d2ec76cf-7984-e8c9-73a8-b32a76…","""/imovel/venda-casa-de-condomin…"
"[""MEZZANINE"", ""HOME_OFFICE""]","""SHED_DEPOSIT_WAREHOUSE""",False,"""Porto Alegre""","""Sarandi""",,,"{""GOOGLE"",-29.993,-51.142,250,null,null}","""RS""",[],[],,[745],"""USED""",0,"""STANDARD""","""2701907307""",0,[2],"""REAL_ESTATE""","[{""RENTAL"",null,13500,null,{null,[],null}}, {""SALE"",null,2100000,null,{null,[],null}}]",0,[745],,"""NEIGHBORHOOD""","[""COMMERCIAL""]","""UNIT""",[],,"""22b62d2f-12d9-2fb9-3a0d-80261c…","""/imovel/venda-galpao-deposito-…"
[],"""BUSINESS""",False,"""Porto Alegre""","""Cidade Baixa""","""Travessa do Carmo""",,"{""GOOGLE"",-30.039,-51.226,140,null,null}","""RS""",[0],[0],0.0,[125],"""USED""",0,"""STANDARD""","""2647956266""",0,[2],"""REAL_ESTATE""","[{""RENTAL"",190,3300,0,{null,[],null}}, {""SALE"",190,860000,0,{null,[],null}}]",0,[125],0.0,"""STREET""","[""COMMERCIAL""]","""UNIT""",[],,"""127bf420-5036-7a0d-d2af-19f8c9…","""/imovel/venda-loja-salao-cidad…"


In [353]:
df.select(pl.col("pricingInfos").explode().struct.field("businessType").unique())

businessType
str
"""SALE"""
"""RENTAL"""


Quando ocorre multiplicidade, é porque o imóvel pode ser comprado ou alugado.
Para nossos propósitos, nos interessamos tão somente em propriedades à venda, e apenas no valor de compra.
Desta forma, podemos descartar todas as outras informações e imóveis que não estão disponíveis para compra,
o que é revelado pelo atributo `businessType` de `pricingInfos`.

In [354]:
df = (
    df.explode(pl.col("pricingInfos"))
    .unnest("pricingInfos")
    .filter(pl.col("businessType") != "RENTAL")
)

In [355]:
df.height

15432

In [356]:
df.head(2)

amenities,unitTypes,resale,city,neighborhood,street,streetNumber,point,stateAcronym,parkingSpaces,bedrooms,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,listingId,buildings,bathrooms,contractType,businessType,yearlyIptu,price,monthlyCondoFee,rentalInfo,unitFloor,usableAreas,suites,displayAddressType,usageTypes,propertyType,unitSubTypes,capacityLimit,accountId,link_href
list[str],str,bool,str,str,str,str,struct[6],str,list[i64],list[i64],i64,list[i64],str,i64,str,str,i64,list[i64],str,str,i64,i64,i64,struct[3],i64,list[i64],i64,str,list[str],str,list[str],i64,str,str
"[""CABLE_TV"", ""WATCHMAN"", … ""INTERCOM""]","""CONDOMINIUM""",False,"""Porto Alegre""","""Sarandi""","""Avenida Ecoville""","""190""","{""GOOGLE"",null,null,null,-29.995522,-51.12181}","""RS""",[4],[3],,[346],"""USED""",0,"""STANDARD""","""2747898832""",0,[3],"""REAL_ESTATE""","""SALE""",,1470000,750,"{null,[],null}",0,[370],1,"""ALL""","[""RESIDENTIAL""]","""UNIT""",[],,"""0f4fcd4f-2e91-4211-8062-f0f0ca…","""/imovel/venda-casa-de-condomin…"
[],"""OFFICE""",False,"""Porto Alegre""","""Centro Histórico""","""Rua Coronel Fernando Machado""","""981""","{""GOOGLE"",null,null,null,-30.035119,-51.227712}","""RS""",[],[0],,[109],"""USED""",0,"""STANDARD""","""2762307994""",0,[1],"""REAL_ESTATE""","""SALE""",68.0,320000,448,"{null,[],null}",0,[106],0,"""ALL""","[""COMMERCIAL""]","""UNIT""",[],,"""10929a8c-7227-5aff-fb05-6bf394…","""/imovel/venda-conjunto-comerci…"


Podemos descartar `businessType`, que não é mais útil, e `rentalInfo`, pois não é relevante para nosso propósito.

In [357]:
df = df.drop("businessType", "rentalInfo")

A seguir, buscamos entender `unitSubTypes`.

In [358]:
df.filter(pl.col("unitSubTypes").list.len() > 1).head(3)

amenities,unitTypes,resale,city,neighborhood,street,streetNumber,point,stateAcronym,parkingSpaces,bedrooms,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,listingId,buildings,bathrooms,contractType,yearlyIptu,price,monthlyCondoFee,unitFloor,usableAreas,suites,displayAddressType,usageTypes,propertyType,unitSubTypes,capacityLimit,accountId,link_href
list[str],str,bool,str,str,str,str,struct[6],str,list[i64],list[i64],i64,list[i64],str,i64,str,str,i64,list[i64],str,i64,i64,i64,i64,list[i64],i64,str,list[str],str,list[str],i64,str,str
"[""GYM"", ""BARBECUE_GRILL"", … ""AIR_CONDITIONING""]","""APARTMENT""",False,"""Porto Alegre""","""Cidade Baixa""","""Avenida Loureiro da Silva""","""1870""","{""GOOGLE"",null,null,null,-30.035237,-51.224475}","""RS""",[2],"[1, 2]",13.0,"[103, 170]","""USED""",0,"""STANDARD""","""2747249760""",0,"[1, 2]","""REAL_ESTATE""",2300.0,749000,1172,12,[103],1,"""ALL""","[""RESIDENTIAL""]","""UNIT""","[""PENTHOUSE"", ""DUPLEX""]",,"""7e682d41-74f9-029f-8684-cf6062…","""/imovel/venda-cobertura-2-quar…"
"[""POOL"", ""PARTY_HALL"", … ""PETS_ALLOWED""]","""HOME""",False,"""Porto Alegre""","""Sarandi""","""Avenida Willy Eugênio Fleck""","""1500""","{""GOOGLE"",null,null,null,-30.003095,-51.120302}","""RS""",[2],[3],,[188],"""USED""",0,"""STANDARD""","""2725647696""",0,[3],"""REAL_ESTATE""",2700.0,990000,700,0,[157],1,"""ALL""","[""RESIDENTIAL""]","""UNIT""","[""CONDOMINIUM"", ""TWO_STORY_HOUSE""]",,"""96742062-22f8-d633-1a99-e0fdd5…","""/imovel/venda-casa-de-condomin…"
"[""RECREATION_AREA"", ""GYM"", … ""AIR_CONDITIONING""]","""HOME""",False,"""Porto Alegre""","""Sarandi""","""Avenida Ecoville""","""790""","{""GOOGLE"",null,null,null,-30.001304,-51.121844}","""RS""",[2],[3],,[150],"""USED""",0,"""STANDARD""","""2747155497""",0,[3],"""REAL_ESTATE""",,1200000,800,0,[150],1,"""ALL""","[""RESIDENTIAL""]","""UNIT""","[""CONDOMINIUM"", ""TWO_STORY_HOUSE""]",,"""bb09d356-3f68-827c-5066-a6412c…","""/imovel/venda-casa-de-condomin…"


`unitSubTypes`, neste caso, define duas categorias que se aplicam ao imóvel.

Por outro lado, um imóvel pode não ter nenhum subtipo? E, se sim, isso é comum?

In [359]:
df.filter(pl.col("unitSubTypes").list.len() == 0).height

14743

De fato, esse é o mais comum dos casos. Se esse dado fosse presente para todos os registros,
poderia ser um atributo interessante. Como não é o caso, o descartamos.

In [360]:
df = df.drop("unitSubTypes")

In [361]:
df.head(3)

amenities,unitTypes,resale,city,neighborhood,street,streetNumber,point,stateAcronym,parkingSpaces,bedrooms,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,listingId,buildings,bathrooms,contractType,yearlyIptu,price,monthlyCondoFee,unitFloor,usableAreas,suites,displayAddressType,usageTypes,propertyType,capacityLimit,accountId,link_href
list[str],str,bool,str,str,str,str,struct[6],str,list[i64],list[i64],i64,list[i64],str,i64,str,str,i64,list[i64],str,i64,i64,i64,i64,list[i64],i64,str,list[str],str,i64,str,str
"[""CABLE_TV"", ""WATCHMAN"", … ""INTERCOM""]","""CONDOMINIUM""",False,"""Porto Alegre""","""Sarandi""","""Avenida Ecoville""","""190""","{""GOOGLE"",null,null,null,-29.995522,-51.12181}","""RS""",[4],[3],,[346],"""USED""",0,"""STANDARD""","""2747898832""",0,[3],"""REAL_ESTATE""",,1470000,750,0,[370],1,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""0f4fcd4f-2e91-4211-8062-f0f0ca…","""/imovel/venda-casa-de-condomin…"
[],"""OFFICE""",False,"""Porto Alegre""","""Centro Histórico""","""Rua Coronel Fernando Machado""","""981""","{""GOOGLE"",null,null,null,-30.035119,-51.227712}","""RS""",[],[0],,[109],"""USED""",0,"""STANDARD""","""2762307994""",0,[1],"""REAL_ESTATE""",68.0,320000,448,0,[106],0,"""ALL""","[""COMMERCIAL""]","""UNIT""",,"""10929a8c-7227-5aff-fb05-6bf394…","""/imovel/venda-conjunto-comerci…"
"[""GARAGE""]","""APARTMENT""",False,"""Porto Alegre""","""Sarandi""","""Rua Gabriel Franco da Luz""","""560""","{""GOOGLE"",null,null,null,-29.981459,-51.123619}","""RS""",[1],[2],,[1],"""USED""",0,"""STANDARD""","""2742952916""",0,[1],"""REAL_ESTATE""",30.0,185000,214,0,[45],0,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""b1c30c71-820c-306e-8445-5e50cc…","""/imovel/venda-apartamento-2-qu…"


In [362]:
df.filter(pl.col("bedrooms").list.len() > 1).head(5)

amenities,unitTypes,resale,city,neighborhood,street,streetNumber,point,stateAcronym,parkingSpaces,bedrooms,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,listingId,buildings,bathrooms,contractType,yearlyIptu,price,monthlyCondoFee,unitFloor,usableAreas,suites,displayAddressType,usageTypes,propertyType,capacityLimit,accountId,link_href
list[str],str,bool,str,str,str,str,struct[6],str,list[i64],list[i64],i64,list[i64],str,i64,str,str,i64,list[i64],str,i64,i64,i64,i64,list[i64],i64,str,list[str],str,i64,str,str
"[""GYM"", ""BARBECUE_GRILL"", … ""AIR_CONDITIONING""]","""APARTMENT""",False,"""Porto Alegre""","""Cidade Baixa""","""Avenida Loureiro da Silva""","""1870""","{""GOOGLE"",null,null,null,-30.035237,-51.224475}","""RS""",[2],"[1, 2]",13.0,"[103, 170]","""USED""",0,"""STANDARD""","""2747249760""",0,"[1, 2]","""REAL_ESTATE""",2300,749000,1172,12,[103],1.0,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""7e682d41-74f9-029f-8684-cf6062…","""/imovel/venda-cobertura-2-quar…"
"[""GARAGE""]","""APARTMENT""",False,"""Porto Alegre""","""Menino Deus""","""Rua Barbedo""","""756""","{""GOOGLE"",null,null,null,-30.05755,-51.22319}","""RS""",[1],"[2, 3]",,"[113, 115]","""USED""",0,"""PREMIUM""","""2721965242""",0,"[1, 2]","""REAL_ESTATE""",0,510000,450,0,"[95, 97]",1.0,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""bdd8f146-4dd2-66d3-fcd8-d46a0e…","""/imovel/venda-apartamento-3-qu…"
"[""SERVICE_AREA"", ""ELEVATOR"", … ""EXTERIOR_VIEW""]","""APARTMENT""",False,"""Porto Alegre""","""Centro Histórico""","""Rua Riachuelo""","""1305""","{""GOOGLE"",null,null,null,-30.031668,-51.22865}","""RS""",[1],"[2, 3]",24.0,[110],"""USED""",0,"""STANDARD""","""2715453947""",0,[2],"""REAL_ESTATE""",1000,510000,0,0,[91],,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""ba926be5-f8ec-df09-3e86-160b73…","""/imovel/venda-apartamento-3-qu…"
"[""GOURMET_BALCONY"", ""INTERCOM"", … ""POOL""]","""PENTHOUSE""",False,"""Porto Alegre""","""Menino Deus""","""Rua José de Alencar""","""485""","{""GOOGLE"",null,null,null,-30.060107,-51.226836}","""RS""",[0],"[1, 2]",10.0,"[112, 1521]","""USED""",0,"""PREMIERE_1""","""2750183194""",0,"[1, 2]","""REAL_ESTATE""",1400,498000,890,0,[97],1.0,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""657d04ba-5eeb-f4c5-ddf5-0fbb0c…","""/imovel/venda-cobertura-2-quar…"
[],"""APARTMENT""",False,"""Porto Alegre""","""Centro Histórico""","""Avenida Borges de Medeiros""",,"{""GOOGLE"",-30.036,-51.228,140,null,null}","""RS""",[0],"[2, 3]",,"[149, 150]","""USED""",0,"""PREMIUM""","""2710656723""",0,"[2, 3]","""REAL_ESTATE""",100,450000,675,0,[125],1.0,"""STREET""","[""RESIDENTIAL""]","""UNIT""",,"""7e36b937-1397-0660-b03b-ff1d4a…","""/imovel/venda-apartamento-3-qu…"


### Localização geográfica dos imóveis

Crucial para nossos esforços é ter a localização dos imóveis, para podermos cruzá-las
com o mapa de inundação.

In [364]:
df = df.unnest("point")
df.head(3)

amenities,unitTypes,resale,city,neighborhood,street,streetNumber,source,approximateLat,approximateLon,radius,lat,lon,stateAcronym,parkingSpaces,bedrooms,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,listingId,buildings,bathrooms,contractType,yearlyIptu,price,monthlyCondoFee,unitFloor,usableAreas,suites,displayAddressType,usageTypes,propertyType,capacityLimit,accountId,link_href
list[str],str,bool,str,str,str,str,str,f64,f64,i64,f64,f64,str,list[i64],list[i64],i64,list[i64],str,i64,str,str,i64,list[i64],str,i64,i64,i64,i64,list[i64],i64,str,list[str],str,i64,str,str
"[""CABLE_TV"", ""WATCHMAN"", … ""INTERCOM""]","""CONDOMINIUM""",False,"""Porto Alegre""","""Sarandi""","""Avenida Ecoville""","""190""","""GOOGLE""",,,,-29.995522,-51.12181,"""RS""",[4],[3],,[346],"""USED""",0,"""STANDARD""","""2747898832""",0,[3],"""REAL_ESTATE""",,1470000,750,0,[370],1,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""0f4fcd4f-2e91-4211-8062-f0f0ca…","""/imovel/venda-casa-de-condomin…"
[],"""OFFICE""",False,"""Porto Alegre""","""Centro Histórico""","""Rua Coronel Fernando Machado""","""981""","""GOOGLE""",,,,-30.035119,-51.227712,"""RS""",[],[0],,[109],"""USED""",0,"""STANDARD""","""2762307994""",0,[1],"""REAL_ESTATE""",68.0,320000,448,0,[106],0,"""ALL""","[""COMMERCIAL""]","""UNIT""",,"""10929a8c-7227-5aff-fb05-6bf394…","""/imovel/venda-conjunto-comerci…"
"[""GARAGE""]","""APARTMENT""",False,"""Porto Alegre""","""Sarandi""","""Rua Gabriel Franco da Luz""","""560""","""GOOGLE""",,,,-29.981459,-51.123619,"""RS""",[1],[2],,[1],"""USED""",0,"""STANDARD""","""2742952916""",0,[1],"""REAL_ESTATE""",30.0,185000,214,0,[45],0,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""b1c30c71-820c-306e-8445-5e50cc…","""/imovel/venda-apartamento-2-qu…"


Nem todas as listagens têm a localização precisa do imóvel. Algumas têm apenas uma
localização aproximada, dada por `approximateLat` e `approximateLon`.

In [365]:
df.filter(pl.col("approximateLat").is_null(), pl.col("lat").is_null())

amenities,unitTypes,resale,city,neighborhood,street,streetNumber,source,approximateLat,approximateLon,radius,lat,lon,stateAcronym,parkingSpaces,bedrooms,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,listingId,buildings,bathrooms,contractType,yearlyIptu,price,monthlyCondoFee,unitFloor,usableAreas,suites,displayAddressType,usageTypes,propertyType,capacityLimit,accountId,link_href
list[str],str,bool,str,str,str,str,str,f64,f64,i64,f64,f64,str,list[i64],list[i64],i64,list[i64],str,i64,str,str,i64,list[i64],str,i64,i64,i64,i64,list[i64],i64,str,list[str],str,i64,str,str
[],"""OFFICE""",True,"""Porto Alegre""","""Menino Deus""","""Avenida Getúlio Vargas""","""910""",,,,,,,"""RS""",[1],[],,[],"""USED""",0,"""PREMIUM""","""2561009358""",0,[1],"""OWNER""",,280000,440.0,14,[32],,"""ALL""","[""COMMERCIAL""]","""UNIT""",,"""1847028f-f2f1-0d59-a208-01c230…","""/imovel/venda-conjunto-comerci…"
[],"""HOME""",True,"""Porto Alegre""","""Sarandi""","""Rua Doutor João Dahne""","""83""",,,,,,,"""RS""",[3],[3],,[],"""USED""",0,"""PREMIUM""","""2489898892""",0,[3],"""OWNER""",900.0,620000,,0,[148],2.0,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""711f0095-bb5c-9ee7-de75-f73ec5…","""/imovel/venda-casa-3-quartos-s…"


Duas listagens não constam nenhuma localização, de forma que devem ser descartadas.

In [366]:
df = df.filter(pl.col("approximateLat").is_not_null() | pl.col("lat").is_not_null())

Desta forma, nossa massa final de dados é:

In [367]:
df.height

15430

In [368]:
df.group_by("neighborhood").len().sort(by="len", descending=True)

neighborhood,len
str,u32
"""Centro Histórico""",6059
"""Menino Deus""",4814
"""Sarandi""",2550
"""Cidade Baixa""",2007


#### Cruzamento da localização dos imóveis com áreas alagadas

In [369]:
kml_data_file = "data/cheias_em_porto_alegre.kml"
polygons = extract_polygons_from_folder(
    kml_data_file, "Inundação em 6 de Maio de 2024"
)

In [370]:
df = mark_points_in_polygons(df, polygons)

In [371]:
df.filter(pl.col("flooded"))

amenities,unitTypes,resale,city,neighborhood,street,streetNumber,source,approximateLat,approximateLon,radius,lat,lon,stateAcronym,parkingSpaces,bedrooms,floors,totalAreas,listingType,unitsOnTheFloor,publicationType,listingId,buildings,bathrooms,contractType,yearlyIptu,price,monthlyCondoFee,unitFloor,usableAreas,suites,displayAddressType,usageTypes,propertyType,capacityLimit,accountId,link_href,flooded
list[str],str,bool,str,str,str,str,str,f64,f64,i64,f64,f64,str,list[i64],list[i64],i64,list[i64],str,i64,str,str,i64,list[i64],str,i64,i64,i64,i64,list[i64],i64,str,list[str],str,i64,str,str,bool
"[""GARAGE""]","""APARTMENT""",false,"""Porto Alegre""","""Sarandi""","""Rua Gabriel Franco da Luz""","""560""","""GOOGLE""",,,,-29.981459,-51.123619,"""RS""",[1],[2],,[1],"""USED""",0,"""STANDARD""","""2742952916""",0,[1],"""REAL_ESTATE""",30,185000,214,0,[45],0,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""b1c30c71-820c-306e-8445-5e50cc…","""/imovel/venda-apartamento-2-qu…",true
"[""AIR_CONDITIONING"", ""CARETAKER"", … ""SERVICE_AREA""]","""APARTMENT""",false,"""Porto Alegre""","""Menino Deus""","""Avenida Getúlio Vargas""","""673""","""GOOGLE""",,,,-30.050427,-51.222042,"""RS""",[],[3],,[126],"""USED""",0,"""STANDARD""","""2716483442""",0,[2],"""REAL_ESTATE""",910,370000,390,0,[93],,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""c10bd73f-1704-1915-de4b-5f4085…","""/imovel/venda-apartamento-3-qu…",true
"[""CONCIERGE_24H"", ""ELEVATOR"", ""WATCHMAN""]","""OFFICE""",false,"""Porto Alegre""","""Centro Histórico""","""Rua Marechal Floriano Peixoto""","""38""","""GOOGLE""",,,,-30.029026,-51.226476,"""RS""",[],[],22,[29],"""USED""",0,"""STANDARD""","""2730185327""",0,[1],"""REAL_ESTATE""",350,35000,290,0,[22],,"""ALL""","[""COMMERCIAL""]","""UNIT""",,"""208f79e5-09c9-c426-5b7e-dc80c4…","""/imovel/venda-conjunto-comerci…",true
"[""GARAGE"", ""PETS_ALLOWED""]","""OFFICE""",false,"""Porto Alegre""","""Menino Deus""","""Avenida Getúlio Vargas""","""1157""","""GOOGLE""",,,,-30.054818,-51.223571,"""RS""",[1],[0],,[71],"""USED""",0,"""STANDARD""","""2719906247""",0,[1],"""REAL_ESTATE""",1000,340000,300,0,[37],0,"""ALL""","[""COMMERCIAL""]","""UNIT""",,"""325915b1-a8be-e0d2-c792-fd1f6c…","""/imovel/venda-conjunto-comerci…",true
[],"""OFFICE""",false,"""Porto Alegre""","""Centro Histórico""","""Praça Quinze de Novembro""","""66""","""GOOGLE""",,,,-30.028219,-51.226551,"""RS""",[],[0],,[80],"""USED""",0,"""STANDARD""","""2730673832""",0,[1],"""REAL_ESTATE""",0,250000,500,0,[58],0,"""ALL""","[""COMMERCIAL""]","""UNIT""",,"""eaf60d0d-b56b-d5a3-e985-6fb4d2…","""/imovel/venda-conjunto-comerci…",true
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"[""GATED_COMMUNITY"", ""PETS_ALLOWED""]","""APARTMENT""",false,"""Porto Alegre""","""Menino Deus""","""Avenida Getúlio Vargas""","""759""","""GOOGLE""",,,,-30.051233,-51.222225,"""RS""",[0],[2],4,[60],"""USED""",4,"""STANDARD""","""2761180909""",2,[1],"""REAL_ESTATE""",300,204900,260,2,[48],0,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""0022ed11-32b7-3634-5828-5c87a5…","""/imovel/venda-apartamento-2-qu…",true
"[""SAFETY_CIRCUIT"", ""INTERCOM""]","""OFFICE""",false,"""Porto Alegre""","""Centro Histórico""","""Rua Vigário José Inácio""","""371""","""GOOGLE""",,,,-30.029015,-51.225908,"""RS""",[0],[0],20,[28],"""USED""",0,"""STANDARD""","""2703317675""",0,[1],"""REAL_ESTATE""",348,59800,300,0,[21],0,"""ALL""","[""COMMERCIAL""]","""UNIT""",,"""7b72e174-9b60-84bf-44f7-79a9c2…","""/imovel/venda-conjunto-comerci…",true
[],"""APARTMENT""",false,"""Porto Alegre""","""Centro Histórico""","""Rua Vigário José Inácio""","""295""","""GOOGLE""",,,,-30.028334,-51.225671,"""RS""",[],[1],,"[29, 30]","""USED""",0,"""STANDARD""","""2689944718""",0,[1],"""REAL_ESTATE""",0,159900,294,0,"[23, 24]",0,"""ALL""","[""RESIDENTIAL""]","""UNIT""",,"""b59d06b4-9f95-ad8d-40fd-3e5dd5…","""/imovel/venda-quitinete-1-quar…",true
"[""GARDEN"", ""PLAYGROUND"", … ""EXTERIOR_VIEW""]","""APARTMENT""",false,"""Porto Alegre""","""Menino Deus""","""Rua Botafogo""",,"""GOOGLE""",-30.054,-51.225,140,,,"""RS""",[0],[2],,[52],"""USED""",0,"""STANDARD""","""2694474277""",0,[1],"""REAL_ESTATE""",0,240000,356,0,[47],0,"""STREET""","[""RESIDENTIAL""]","""UNIT""",,"""2c4b2bac-a3db-46d9-16b2-7707d1…","""/imovel/venda-apartamento-2-qu…",true


In [375]:
(
    df.group_by("neighborhood")
    .agg(
        [
            pl.count().alias("total_count"),
            pl.col("flooded")
            .sum()
            .alias(
                "flooded_count"
            ),  # Since flooded is boolean, sum() gives us the count of True values
        ]
    )
    .with_columns(
        [
            (pl.col("flooded_count") / pl.col("total_count") * 100)
            .round(1)
            .alias("flooded_percentage")
        ]
    )
)

  pl.count().alias("total_count"),


neighborhood,total_count,flooded_count,flooded_percentage
str,u32,u32,f64
"""Menino Deus""",4814,2582,53.6
"""Centro Histórico""",6059,1797,29.7
"""Cidade Baixa""",2007,835,41.6
"""Sarandi""",2550,534,20.9


In [372]:
df.filter(pl.col("flooded")).height

5748

In [373]:
df.filter(pl.col("flooded")).group_by("neighborhood").len().sort(
    by="len", descending=True
)

neighborhood,len
str,u32
"""Menino Deus""",2582
"""Centro Histórico""",1797
"""Cidade Baixa""",835
"""Sarandi""",534
