In [1]:
import requests
import os
from bs4 import BeautifulSoup

#### Get VRA file download links

In [2]:
file_links_vra = []

In [3]:
for i in range(2017,2024):
    base_link = f"https://siros.anac.gov.br/siros/registros/diversos/vra/{i}"
    try:
        response = requests.get(base_link)
        if response.status_code == 200:
            res_text = response.text
            soup = BeautifulSoup(res_text,'html.parser')
            links = soup.find_all('a', href=True)
            for link in links:
                header = 'https://siros.anac.gov.br'
                href = link['href']
                if href.endswith(".csv"):
                    file_links_vra.append(header+href)
    except Exception as e:
        print(f"Error occured: {e}")

In [4]:
file_links_vra

['https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_01.csv',
 'https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_02.csv',
 'https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_03.csv',
 'https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_04.csv',
 'https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_05.csv',
 'https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_06.csv',
 'https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_07.csv',
 'https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_08.csv',
 'https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_09.csv',
 'https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_10.csv',
 'https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_11.csv',
 'https://siros.anac.gov.br/siros/registros/diversos/vra/2017/VRA_2017_12.csv',
 'https://siros.anac.gov.br/siros/regist

#### Create file directory per year

In [42]:
dest_path_vra = r"C:\Users\USER\Desktop\vra_bfd_dataset\VRA"

for i in range(2017,2024):
    dir_name_vra = os.path.join(dest_path_vra,f"VRA_{i}")
    os.makedirs(dir_name_vra,exist_ok=True)


#### Download VRA dataset

In [5]:
vra_dest = r"C:\Users\USER\Desktop\vra_bfd_dataset\VRA"
for link in file_links_vra:
    file_name = link.split('/')[-1]
    year = file_name.split('_')[1]
    folder = os.path.join(vra_dest,f"VRA_{year}")
    file_path = os.path.join(folder,file_name)
    chunk_size = 5 * 1024 * 1024
    
    if os.path.exists(file_path):
        print(f"File already exists at: {file_path}")
    else:
        try:
            response = requests.get(link,stream=True)
            if response.status_code == 200:
                with open(file_path,'wb') as file:
                    for chunk in response.iter_content(chunk_size):
                        file.write(chunk)
                print(f"File saved at {file_path}")
        except Exception as e:
                print(f"Error occured: {e}")

File already exists at: C:\Users\USER\Desktop\vra_bfd_dataset\VRA\VRA_2017\VRA_2017_01.csv
File already exists at: C:\Users\USER\Desktop\vra_bfd_dataset\VRA\VRA_2017\VRA_2017_02.csv
File already exists at: C:\Users\USER\Desktop\vra_bfd_dataset\VRA\VRA_2017\VRA_2017_03.csv
File already exists at: C:\Users\USER\Desktop\vra_bfd_dataset\VRA\VRA_2017\VRA_2017_04.csv
File already exists at: C:\Users\USER\Desktop\vra_bfd_dataset\VRA\VRA_2017\VRA_2017_05.csv
File already exists at: C:\Users\USER\Desktop\vra_bfd_dataset\VRA\VRA_2017\VRA_2017_06.csv
File already exists at: C:\Users\USER\Desktop\vra_bfd_dataset\VRA\VRA_2017\VRA_2017_07.csv
File already exists at: C:\Users\USER\Desktop\vra_bfd_dataset\VRA\VRA_2017\VRA_2017_08.csv
File already exists at: C:\Users\USER\Desktop\vra_bfd_dataset\VRA\VRA_2017\VRA_2017_09.csv
File saved at C:\Users\USER\Desktop\vra_bfd_dataset\VRA\VRA_2017\VRA_2017_10.csv
File saved at C:\Users\USER\Desktop\vra_bfd_dataset\VRA\VRA_2017\VRA_2017_11.csv
File saved at C:\Us

#### Save links to txt file for future use

In [37]:
with open("C:\\Users\\USER\\Desktop\\anac_reg_flights\\misc_data\\vra_links.txt","w") as vra_links:
    for link in file_links_vra:
        vra_links.write(link+'\n')