<a href="https://colab.research.google.com/github/rebellovcr/Shark-Hunter/blob/main/Shark_Hunter_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import os
import requests
import zipfile
import datetime
from tqdm import tqdm  # Import tqdm for progress bar

def get_date_input():
    while True:
        input_date = input("Digite a data no formato DD-MM-AAAA: ")
        try:
            datetime.datetime.strptime(input_date, "%d-%m-%Y")
            return input_date
        except ValueError:
            print("Formato de data inválido. Digite novamente.")

def download_file_with_progress(url, save_path):
    with requests.get(url, stream=True) as r:
        total_size_in_bytes = int(r.headers.get('content-length', 0))
        block_size = 1024  # 1 Kibibyte
        progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
        with open(save_path, 'wb') as f:
            for chunk in r.iter_content(chunk_size=block_size):
                progress_bar.update(len(chunk))
                f.write(chunk)
        progress_bar.close()

def extract_zip_with_progress(zip_file, extract_dir):
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        file_list = zip_ref.namelist()
        for file in tqdm(file_list, desc='Extracting', unit='files'):
            zip_ref.extract(file, extract_dir)

def filter_and_rewrite_file(input_file):
    temp_file = input_file + ".tmp"
    total_lines = sum(1 for line in open(input_file, 'r', encoding='utf-8'))  # Count total lines in the original file

    header_line = "Ativo;Valor;Quantidade;Hora;Data;Comprador;Vendedor;Valor Total da Transação\n"

    with open(input_file, 'r', encoding='utf-8') as infile, open(temp_file, 'w', encoding='utf-8') as outfile:
        outfile.write(header_line)  # Write header line

        progress_bar = tqdm(total=total_lines, desc='Filtrando', unit=' linhas')

        for line in infile:
            columns = line.strip().split(';')
            if len(columns) >= 9:
                try:
                    if float(columns[4].replace(',', '.')) >= 100000:
                        # Remove the first, third, seventh, and eighth elements from columns list
                        del columns[0]  # Remove first element
                        del columns[1]  # After first removal, third element is now at index 1
                        del columns[4]  # After removal of previous elements, seventh element is now at index 5
                        del columns[4]  # After removal of previous elements, eighth element is now at index 5

                        # Calculate the product of the second and third elements
                        second_element = float(columns[1].replace(',', '.'))
                        third_element = float(columns[2].replace(',', '.'))
                        product = second_element * third_element

                        # Format the product with point as thousands separator and comma as decimal separator
                        formatted_product = "{:,.2f}".format(product)

                        # Replace format to Brazilian standard (1.000,00 for thousand, 10.000,00 for ten thousand, etc.)
                        formatted_product = formatted_product.replace('.', 'temp').replace(',', '.').replace('temp', ',')

                        # Append the formatted product as the last element
                        columns.append(formatted_product)

                        # Join columns back into a line and write to outfile
                        outfile.write(';'.join(columns) + '\n')
                except (ValueError, IndexError):
                    pass  # If not a valid number or not enough elements, do not write

            progress_bar.update(1)  # Update progress bar for each line processed

        progress_bar.close()

    os.remove(input_file)
    os.rename(temp_file, input_file)

def main():
    input_date = get_date_input()
    split_date = input_date.split('-')
    url_date_format = f"{split_date[2]}-{split_date[1]}-{split_date[0]}"

    url = f"https://arquivos.b3.com.br/apinegocios/tickercsv/{url_date_format}.zip"
    current_directory = os.getcwd()
    zip_file_path = os.path.join(current_directory, f"{url_date_format}.zip")

    print("Baixando arquivo ZIP...")
    download_file_with_progress(url, zip_file_path)
    print(f"Arquivo ZIP baixado com sucesso: {os.path.basename(zip_file_path)}")

    print("Extraindo arquivo ZIP...")
    extract_zip_with_progress(zip_file_path, current_directory)
    print(f"Arquivo ZIP extraído para: {current_directory}")

    os.remove(zip_file_path)
    print(f"Arquivo ZIP deletado: {os.path.basename(zip_file_path)}")

    input_file = f"{input_date}_NEGOCIOSAVISTA.txt"
    filter_and_rewrite_file(input_file)

    print(f"Filtro aplicado e arquivo {input_file} atualizado com sucesso.")

if __name__ == "__main__":
    main()


Digite a data no formato DD-MM-AAAA: 15-07-2024
Baixando arquivo ZIP...


100%|██████████| 70.2M/70.2M [00:08<00:00, 8.13MiB/s]


Arquivo ZIP baixado com sucesso: 2024-07-15.zip
Extraindo arquivo ZIP...


Extracting: 100%|██████████| 1/1 [00:03<00:00,  3.84s/files]


Arquivo ZIP extraído para: /content
Arquivo ZIP deletado: 2024-07-15.zip


Filtrando: 100%|██████████| 8260069/8260069 [00:09<00:00, 917336.41 linhas/s] 

Filtro aplicado e arquivo 15-07-2024_NEGOCIOSAVISTA.txt atualizado com sucesso.



