In [1]:
import os
import requests
from bs4 import BeautifulSoup
import yaml

In [2]:
# Carregar configurações do arquivo config.yaml
with open("config.yaml", "r") as file:
    config = yaml.safe_load(file)

# Caminho para a pasta raw
raw_data_path = config["paths"]["data_raw"]

# Garantir que a pasta raw existe
os.makedirs(raw_data_path, exist_ok=True)

In [3]:
# URL base do site
base_url = "https://dataserver-coids.inpe.br/queimadas/queimadas/focos/csv/anual/"

In [4]:
# Função para baixar arquivos de uma URL
def download_file(url, save_path):
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(save_path, "wb") as file:
            for chunk in response.iter_content(chunk_size=1024):
                file.write(chunk)

In [5]:
# Requisitar a página principal
response = requests.get(base_url)
if response.status_code == 200:
    soup = BeautifulSoup(response.content, "html.parser")

    # Encontrar todos os links para pastas ou arquivos
    rows = soup.find_all("div", class_="row")
    for row in rows:
        cell = row.find("div", class_="cell name")
        if cell:
            link = cell.find("a")
            if link:
                href = link.get("href")
                folder_url = base_url + href

                # Verificar se é uma pasta
                if href.endswith("/"):
                    # Criar uma subpasta correspondente
                    folder_name = href.strip("/")
                    folder_path = os.path.join(raw_data_path, folder_name)
                    os.makedirs(folder_path, exist_ok=True)

                    # Acessar o conteúdo da pasta
                    folder_response = requests.get(folder_url)
                    if folder_response.status_code == 200:
                        folder_soup = BeautifulSoup(folder_response.content, "html.parser")
                        folder_rows = folder_soup.find_all("div", class_="row")

                        for folder_row in folder_rows:
                            folder_cell = folder_row.find("div", class_="cell name")
                            if folder_cell:
                                file_link = folder_cell.find("a")
                                if file_link:
                                    file_href = file_link.get("href")
                                    file_url = folder_url + file_href
                                    file_name = file_href.strip("/")
                                    file_save_path = os.path.join(folder_path, file_name)

                                    # Baixar o arquivo
                                    print(f"Baixando {file_name} para {file_save_path}")
                                    download_file(file_url, file_save_path)
else:
    print(f"Erro ao acessar {base_url}. Status code: {response.status_code}")


Baixando focos_ams_ref_2003.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2003.zip
Baixando focos_ams_ref_2004.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2004.zip
Baixando focos_ams_ref_2005.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2005.zip
Baixando focos_ams_ref_2006.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2006.zip
Baixando focos_ams_ref_2007.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2007.zip
Baixando focos_ams_ref_2008.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2008.zip
Baixando focos_ams_ref_2009.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2009.zip
Baixando focos_ams_ref_2010.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2010.zip
Baixando focos_ams_ref_2011.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2011.zip
Baixando focos_ams_ref_2012.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2012.zip
Baixando focos_ams_ref_2013.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2013.zip
Baixando focos_ams_ref_2014.zip para ../data/raw\AMS_sat_ref\focos_ams_ref_2

In [9]:
import pandas as pd
from sklearn.cluster import KMeans

# Sample data
data = {'Latitude': [37.7749, 34.0522, 36.1699, 40.7128],
        'Longitude': [-122.4194, -118.2437, -115.1398, -74.0060]}
df = pd.DataFrame(data)

# Apply KMeans clustering
kmeans = KMeans(n_clusters=2, random_state=0).fit(df)
df['Cluster'] = kmeans.labels_
print(df)

   Latitude  Longitude  Cluster
0   37.7749  -122.4194        0
1   34.0522  -118.2437        0
2   36.1699  -115.1398        0
3   40.7128   -74.0060        1
