In [None]:
!pip install --upgrade git+https://github.com/huggingface/transformers.git accelerate datasets[audio]

!python -m pip install "pymongo[srv]"

In [None]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from datasets import load_dataset
import os
import librosa
from pymongo import MongoClient
from bson.objectid import ObjectId

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model_id = "openai/whisper-large-v3"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    max_new_tokens=128,
    chunk_length_s=30,
    batch_size=16,
    return_timestamps=True,
    torch_dtype=torch_dtype,
    device=device,
)

conn_str = "mongodb+srv://user_fiaper:lu350612@cluster0.6z8xkgo.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
try:
    client = MongoClient(conn_str)  # Replace with your MongoDB connection URI
    print("Connected to MongoDB successfully!")
except Exception as e:
    print(f"Error: {e}")

# Specify the database and collection
db = client["Cluster"]
collection = db["Audios"]


# Directory containing audio files
directory_path = '/workspaces/Challenge_FIAP/Challenge TOTVS Amostra de Dados v2'

# Get all files in the directory
files = os.listdir(directory_path)

# Filter audio files (you can adjust the condition based on your file extensions)
audio_files = [file for file in files if file.endswith('.wav') or file.endswith('.mp3')]

# Process each audio file
for file_name in audio_files:
    file_path = os.path.join(directory_path, file_name)  # Full path to the audio file
    try:
        audio, sr = librosa.load(file_path, sr=None)  # Load the audio file
        result = pipe(audio)

        # Insert transcription into MongoDB
        insertion_result = collection.insert_one({"file_name": file_name, "transcription": result["text"]})

        # Print or log the inserted document's ObjectID
        print(f"Inserted transcription for {file_name} with ID: {insertion_result.inserted_id}")

    except Exception as e:
        print(f"Error processing {file_name}: {str(e)}")

client.close()  # Close MongoDB connection when done