In [None]:
import os
import glob
import whisper
from transformers import pipeline

# 1. Settings
AUDIO_DIR = "./resources/input/"
OUTPUT_DIR = "./resources/output/"
MODEL_SIZE = "medium"  # You can use tiny, base, small, medium, large

# 2. Create the output folder if it does not exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# 3. Find the first MP3 file
mp3_files = glob.glob(os.path.join(AUDIO_DIR, "*.mp3"))
if not mp3_files:
    raise FileNotFoundError(f"No MP3 file found in {AUDIO_DIR}")
audio_path = mp3_files[0]
print(f"üéß Using audio file: {audio_path}")

# 4. Load the Whisper model and transcribe
print(f"üß† Transcribing with Whisper ({MODEL_SIZE})... please wait, this can take several minutes...")
model = whisper.load_model(MODEL_SIZE)
result = model.transcribe(audio_path)

transcription = result["text"]
print("‚úÖ Transcription completed.")

# 5. Save transcription
transcription_path = os.path.join(OUTPUT_DIR, "transcription.txt")
with open(transcription_path, "w", encoding="utf-8") as f:
    f.write(transcription)
print(f"üìù Transcription saved at {transcription_path}")

# 6. Optional: summarize the text with a local or Hugging Face model
print("üßæ Generating text summary...")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
chunks = [transcription[i:i+1024] for i in range(0, len(transcription), 1024)]
summary_parts = [summarizer(chunk)[0]['summary_text'] for chunk in chunks]
summary = "\n".join(summary_parts)

# 7. Save summary
summary_path = os.path.join(OUTPUT_DIR, "summary.txt")
with open(summary_path, "w", encoding="utf-8") as f:
    f.write(summary)
print(f"üìÑ Summary saved at {summary_path}")
display(summary)


üéß Usando archivo de audio: ./resources/input\audio_sample.mp3
üß† Transcribiendo con Whisper (medium)...


  checkpoint = torch.load(fp, map_location=device)
