In [None]:
import geopandas as gpd
import pandas as pd

from pathlib import Path
import unicodedata
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib
import numpy as np
import seaborn as sns

import requests
import zipfile
import io

In [None]:
# URL of the zip file
url = "https://geoftp.ibge.gov.br/organizacao_do_territorio/malhas_territoriais/malhas_municipais/municipio_2024/UFs/RS/RS_Municipios_2024.zip"

# Folder to extract files
extract_to = Path("../data/shapefile_rs").resolve()
extract_to.mkdir(exist_ok=True, parents=True)

# Download file
response = requests.get(url)
response.raise_for_status()

# Unzip file
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    z.extractall(extract_to)

print(f"Files extracted to: {extract_to}")


In [None]:
# Load data
client_data_path = Path("../data/processed/clientes.csv")
client_data = pd.read_csv(client_data_path)

# Load geographic boundaries of cities in RS
cities_path = Path("../data/shapefile_rs/RS_Municipios_2024.shp")
gdf = gpd.read_file(cities_path)

In [None]:
client_data["Município"].value_counts()

In [None]:
gdf["NM_MUN"]

In [None]:
def normalize(text):
    if pd.isnull(text):
        return ""
    # Remove accents
    text = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('utf-8')
    # Convert to uppercase and strip whitespace
    return text.upper().strip()

# apply to client data
client_data["city"] = client_data["Município"].apply(normalize)
# apply to IBGE database
gdf["city"] = gdf["NM_MUN"].apply(normalize)




In [None]:
city_counts = client_data["city"].value_counts().reset_index()
merged = gdf.merge(city_counts, how="left", on="city").fillna(0)

In [None]:

base_cmap = matplotlib.colormaps["Blues"]
new_colors = base_cmap(np.linspace(0.3, 1, 256))

# Set the first color (for 0 values) to white
new_colors[0] = [1, 1, 1, 1]  # RGBA for white
custom_cmap = mcolors.ListedColormap(new_colors)

# Normalize the data so 0 maps to white and higher values start visibly red
norm = mcolors.Normalize(vmin=0, vmax=merged['count'].max())

fig, ax = plt.subplots(1,1, figsize=(8,8))
merged.plot(column="count", ax=ax, legend=True, cmap=custom_cmap, edgecolor="black")
ax.set_title("Distribuição das cidades no RS")
ax.axis("off")
fig.savefig("../plots/clientes_mapa.png", transparent=True)
plt.show()


In [None]:
len(city_counts["city"].unique())

In [None]:
city_counts_f = city_counts.iloc[:30]

custom_cmap = mcolors.ListedColormap(new_colors[1:])

fig, ax = plt.subplots(figsize=(7, 8))
sns.barplot(city_counts_f, x="count", y="city", hue="count", palette=custom_cmap, ax=ax)
# Add value labels to the right of each bar
for i, row in city_counts_f.iterrows():
    ax.text(
        row['count'] + 1,       # X position: slightly to the right of the bar
        i,                      # Y position (bar index)
        int(row['count']),      # Text (value)
        va='center',            # Vertical alignment
        ha='left',              # Horizontal alignment
        fontsize=9
    )

# Optional styling
ax.set_xlabel("Numero de Clientes")
ax.set_ylabel("Cidade")
ax.set_title("Top 30 cidades em número de clientes")
fig.savefig("../plots/clientes_top30.png", transparent=True)
plt.tight_layout()
plt.show()