In [2]:
# --- Configurações e Listas Iniciais ---
import subprocess
import sys
import os
#import optimum

packages_and_git_to_check = [
    "torch",
    "glob2",
    "install",
    "shutils",
    "requests",
    "os",
    "python-docx",
    "git+https://github.com/openai/whisper.git",
    "git+https://github.com/huggingface/transformers.git",
    "git+https://github.com/huggingface/accelerate.git"
]

ubuntu_packages_to_check = ["ffmpeg"]


#from kaggle_secrets import UserSecretsClient
#user_secrets = UserSecretsClient()
#secret_value_0 = user_secrets.get_secret("OPENAI_API_KEY")
## https://huggingface.co/settings/tokens
#OPENAI_API_KEY = secret_value_0


cmd_limpar = "rm -rf *.wav *.txt *.docx"
os.system(cmd_limpar)

def is_package_installed(package_name):
    try:
        __import__(package_name)
        return True
    except ImportError:
        return False

def is_git_package_installed(git_url):
    installed_packages = str(subprocess.check_output([sys.executable, "-m", "pip", "list"]))
    package_name = git_url.split("/")[-1].replace(".git", "")
    return package_name in installed_packages

def is_ubuntu_package_installed(ubuntu_package):
    try:
        subprocess.check_output(["dpkg", "-l", ubuntu_package])
        return True
    except subprocess.CalledProcessError:
        return False

def install_package(package_name_or_git_url):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package_name_or_git_url])

def install_ubuntu_package(ubuntu_package):
    subprocess.check_call(["sudo", "apt-get", "install", "-y", ubuntu_package])


# --- Instalação de Pacotes ---

for item in packages_and_git_to_check:
    if item.startswith("git+"):
        if not is_git_package_installed(item):
            install_package(item)
    else:
        if not is_package_installed(item):
            install_package(item)

for ubuntu_package in ubuntu_packages_to_check:
    if not is_ubuntu_package_installed(ubuntu_package):
        install_ubuntu_package(ubuntu_package)
        
        
# --- ffmpeg normalize extract ---


def normalize_audio(input_audio, output_audio):
    cmd = [
        "ffmpeg",
        "-y", # para permitir a sobregravação de arquivos
        "-i", input_audio,
        #"-af", "acompressor=threshold=-18dB:ratio=5:1",
        #"-af", "acompressor=threshold=-15dB:ratio=3:1,atempo=0.5",
        "-af", "dynaudnorm",
        "-af", "volume=3.0",
        output_audio
    ]
    try:
        subprocess.run(cmd, check=True, stderr=subprocess.PIPE)
    except subprocess.CalledProcessError as e:
        print(f"Ocorreu um erro: {e.stderr.decode('utf-8')}")
    #subprocess.run(cmd, check=True)
    
    
def extract_audio_from_video(video_path, audio_output_path):
    cmd = [
        "ffmpeg",
        "-i", video_path,
        "-q:a", "0",  # Qualidade máxima
        "-map", "a", 
        audio_output_path
    ]
    subprocess.run(cmd, check=True)


# --- Manipulação de Arquivos e Preparação de Modelos ---

# Agora podemos importar os módulos com segurança após a instalação
import requests
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
#from datasets import load_dataset
import shutil
import gc
import whisper
#import optimum

torch.cuda.empty_cache()
gc.collect()

# Configuração do modelo Whisper
#model_name = "large-v3"
model_id = "openai/whisper-large-v3"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
#model = whisper.load_model(model_name, DEVICE)

model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True)
#model = model.to_bettertransformer()
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    max_new_tokens=128,
    chunk_length_s=30,
    batch_size=16,
    return_timestamps=True,
    torch_dtype=torch_dtype,
    device=device,
)



# --- Transcrição de Áudios ---

base_dir = "/kaggle/input/audios/"
extensions = ('.mp4', '.mp3', '.aac', '.m4v')
mp4_files = [f for f in os.listdir(base_dir) if f.endswith(extensions)]

#options = dict(language="pt", beam_size=5, patience=2, best_of=5)
#options = dict(beam_size=5, patience=2, best_of=5)

#transcribe_options = dict(task="transcribe", **options)

    
for file in mp4_files:
    input_video = os.path.join(base_dir, file)
    
    # Retira a extensão do arquivo e adiciona "_extracted.wav"
    base_filename = os.path.basename(file)
    for ext in extensions:
        base_filename = base_filename.replace(ext, "")
    extracted_audio = os.path.join("/kaggle/working/", base_filename + "_extracted.wav")
    
    extract_audio_from_video(input_video, extracted_audio)

    # Normalizar o áudio extraído
    normalized_audio = os.path.splitext(extracted_audio)[0] + "_normalized.wav"
    normalize_audio(extracted_audio, normalized_audio)

    # Use o áudio normalizado para transcrição
    #script = model.transcribe(normalized_audio, **transcribe_options)["text"]
    #script = pipe(normalized_audio, generate_kwargs={"language": "portuguese", "task": "transcribe"})
    script = pipe(normalized_audio, generate_kwargs={"language": "portuguese", "task": "transcribe"})["text"]
    txt_file_name = os.path.splitext(file)[0] + '.txt'
    with open(txt_file_name, 'w', encoding='utf-8') as txt_file:
        txt_file.write(f"{script}")
    print(f"Transcrição para {file} salva em {txt_file_name}")
    print(f"{script}")
    
    
# Conversão de txt para docx.

from docx import Document

# Caminho para a pasta onde estão os arquivos .txt
diretorio_txt = "/kaggle/working/"

# Caminho para a pasta onde você quer salvar os arquivos .docx
diretorio_docx = "/kaggle/working/"

# Verifica se o diretório para salvar os arquivos docx existe, caso contrário, cria
# if not os.path.exists(diretorio_docx):
#     os.makedirs(diretorio_docx)

# Lista todos os arquivos no diretório
for nome_arquivo in os.listdir(diretorio_txt):
    if nome_arquivo.endswith(".txt"):
        
        # Cria um novo documento docx
        doc = Document()
        
        # Abre o arquivo txt e lê o conteúdo
        with open(os.path.join(diretorio_txt, nome_arquivo), 'r', encoding='utf-8') as f:
            conteudo = f.read()
        
        # Adiciona o conteúdo ao documento docx
        doc.add_paragraph(conteudo)
        
        # Salva o documento docx
        nome_docx = os.path.splitext(nome_arquivo)[0] + ".docx"
        doc.save(os.path.join(diretorio_docx, nome_docx))

print("Conversão concluída.")


Collecting glob2
  Downloading glob2-0.7.tar.gz (10 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: glob2
  Building wheel for glob2 (setup.py): started
  Building wheel for glob2 (setup.py): finished with status 'done'
  Created wheel for glob2: filename=glob2-0.7-py2.py3-none-any.whl size=9301 sha256=74e0dc7f135287373a1994c2c0d676f3de9f3ddbd05ac32c715ab6ec2e89b020
  Stored in directory: /root/.cache/pip/wheels/37/07/ce/cbe8d31ad93224571b49fa03f8a5da11cdb31d3845ff73e0f3
Successfully built glob2
Installing collected packages: glob2
Successfully installed glob2-0.7
Collecting install
  Downloading install-1.3.5-py3-none-any.whl (3.2 kB)
Installing collected packages: install
Successfully installed install-1.3.5
Collecting shutils
  Downloading shutils-0.1.0.tar.gz (2.3 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting config

  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-plfbhdxp


  Resolved https://github.com/openai/whisper.git to commit e58f28804528831904c3b6f2c0e473f346223433
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting triton<3,>=2.0.0 (from openai-whisper==20231117)
  Obtaining dependency information for triton<3,>=2.0.0 from https://files.pythonhosted.org/packages/4d/22/91a8af421c8a8902dde76e6ef3db01b258af16c53d81e8c0d0dc13900a9e/triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata
  Downloading triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)
Collecting tiktoken (from openai-whisper==20231117)
  Obtaining dependency information for tiktoken from https://files.pythonhosted.org/packages/f4/2e/0a



Downloading config.json:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

Downloading generation_config.json:   0%|          | 0.00/3.87k [00:00<?, ?B/s]

Downloading (…)rocessor_config.json:   0%|          | 0.00/340 [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/283k [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/2.48M [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/494k [00:00<?, ?B/s]

Downloading normalizer.json:   0%|          | 0.00/52.7k [00:00<?, ?B/s]

Downloading added_tokens.json:   0%|          | 0.00/34.6k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.07k [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsr

Transcrição para aula7.mp4 salva em aula7.txt
 Muito bem, bom dia a todos. Começando aqui oficialmente nossa sétima aula, já caminhando para o encerramento do nosso curso. Espero que esteja sendo proveitoso para todos. Na aula de hoje nós escolhemos o tema da imprescritibilidade. É um tema que a gente não vê muito. Alguma coisa que nós vamos conversar aqui hoje. É algo que nós já tratamos na aula passada, mas a minha aula vai falando dos temas, mas chega uma hora que a gente tenta dar uma sistematizada melhor para que vocês possam que vocês possam organizar as ideias aí no quadro mais sistema também deu uma olhadinha esse final de semana aqui na no tema dos das atividades eu acho que tá todo mundo indo bem né é ali no tema, a atividade, essa que está acontecendo até hoje, é sobre o tema da incapacidade, né, da perda dos incapazes, do benefício da suspensa da prescrição. Então, não vou comentar, não, deixo para vocês quebarem a cabeça aí um pouco. Com relação à anterior, que já está con

In [None]:
import os
os._exit(00)

In [1]:
 ! rm -rf *.wav *.txt *.docx *.mp4

In [None]:
!cat /proc/cpuinfo