# Installing Whisper

The commands below will install the Python packages needed to use Whisper models and evaluate the transcription results.

In [1]:
# ! pip install git+https://github.com/openai/whisper.git
# Comentado porque ya quedó instalado en local

In [1]:
import io
import os
import numpy as np

try:
    import tensorflow  # required in Colab to avoid protobuf compatibility issues
except ImportError:
    pass

import torch
import pandas as pd
import urllib
import tarfile
import whisper
import torchaudio

from scipy.io import wavfile
from tqdm.notebook import tqdm


pd.options.display.max_rows = 100
pd.options.display.max_colwidth = 1000
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Loading the Fleurs dataset

Select the language of the Fleur dataset to download. Please note that the transcription and translation performance varies widely depending on the language. Appendix D.2 in the paper contains the performance breakdown by language.

In [3]:
import ipywidgets as widgets

languages = {"af_za": "Afrikaans", "am_et": "Amharic", "ar_eg": "Arabic", "as_in": "Assamese", "az_az": "Azerbaijani", "be_by": "Belarusian", "bg_bg": "Bulgarian", "bn_in": "Bengali", "bs_ba": "Bosnian", "ca_es": "Catalan", "cmn_hans_cn": "Chinese", "cs_cz": "Czech", "cy_gb": "Welsh", "da_dk": "Danish", "de_de": "German", "el_gr": "Greek", "en_us": "English", "es_419": "Spanish", "et_ee": "Estonian", "fa_ir": "Persian", "fi_fi": "Finnish", "fil_ph": "Tagalog", "fr_fr": "French", "gl_es": "Galician", "gu_in": "Gujarati", "ha_ng": "Hausa", "he_il": "Hebrew", "hi_in": "Hindi", "hr_hr": "Croatian", "hu_hu": "Hungarian", "hy_am": "Armenian", "id_id": "Indonesian", "is_is": "Icelandic", "it_it": "Italian", "ja_jp": "Japanese", "jv_id": "Javanese", "ka_ge": "Georgian", "kk_kz": "Kazakh", "km_kh": "Khmer", "kn_in": "Kannada", "ko_kr": "Korean", "lb_lu": "Luxembourgish", "ln_cd": "Lingala", "lo_la": "Lao", "lt_lt": "Lithuanian", "lv_lv": "Latvian", "mi_nz": "Maori", "mk_mk": "Macedonian", "ml_in": "Malayalam", "mn_mn": "Mongolian", "mr_in": "Marathi", "ms_my": "Malay", "mt_mt": "Maltese", "my_mm": "Myanmar", "nb_no": "Norwegian", "ne_np": "Nepali", "nl_nl": "Dutch", "oc_fr": "Occitan", "pa_in": "Punjabi", "pl_pl": "Polish", "ps_af": "Pashto", "pt_br": "Portuguese", "ro_ro": "Romanian", "ru_ru": "Russian", "sd_in": "Sindhi", "sk_sk": "Slovak", "sl_si": "Slovenian", "sn_zw": "Shona", "so_so": "Somali", "sr_rs": "Serbian", "sv_se": "Swedish", "sw_ke": "Swahili", "ta_in": "Tamil", "te_in": "Telugu", "tg_tj": "Tajik", "th_th": "Thai", "tr_tr": "Turkish", "uk_ua": "Ukrainian", "ur_pk": "Urdu", "uz_uz": "Uzbek", "vi_vn": "Vietnamese", "yo_ng": "Yoruba"}
selection = widgets.Dropdown(
    options=[("Select language", None), ("----------", None)] + sorted([(f"{v} ({k})", k) for k, v in languages.items()]),
    value="es_419",
    description='Language:',
    disabled=False,
)

selection

Dropdown(description='Language:', index=69, options=(('Select language', None), ('----------', None), ('Afrika…

In [4]:
lang = selection.value
language = languages[lang]

assert lang is not None, "Please select a language"
print(f"Selected language: {language} ({lang})")

Selected language: Spanish (es_419)


In [5]:
#crear archivo de texto
f = open('eli-corta2.txt','a')

#whispereando tomado de https://github.com/openai/whisper

audio = "corta-eli.mp3"


# Running inference on the dataset using a BASIC (proque en mi CPU no deja más) Whisper model

The following will take a few minutes to transcribe and translate utterances in the dataset.

In [6]:
model = whisper.load_model("base")

f.write(
    f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
    f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
)

# No se como poner las dos acciones en ua sola sentencia
print(
    f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
    f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
)


Model is multilingual and has 71,825,920 parameters.


In [7]:
options = dict(language=language, beam_size=5, best_of=5)
transcribe_options = dict(task="transcribe", **options)
translate_options = dict(task="translate", **options)

In [14]:
#references = []
transcriptions = []
#translations = []

transcription = model.transcribe('corta-eli.mp3')
#translation = model.transcribe(audio, **translate_options)["text"]
    
transcriptions.append(transcription)
  #  translations.append(translation)
#references.append(text)



In [15]:
data = pd.DataFrame(dict(transcription=transcriptions))
data

Unnamed: 0,transcription
0,"{'text': ' Caramací o Felipe Cayet, Río Puerto de ese. ¿Cuál es tu nombre? Con Fleto Daisy, si no sé. Daisy Jimenez de Spinosa. ¿Cuántos años tienes? 27. ¿Ya te quieres quitar este? Sí, un año. Un año. Estamos a 18 de agosto. Un veinte veintidós. Bueno, Daisy. Me gustaría saber más un poquito acerca de cómo ves tú. ¿Los beneficios o los no beneficios o los problemas? ¿O cómo ves tú el que le jido haga manejo forestal? Para empezar, sabemos lo que es el manejo forestal. El corte de la madera, que viene el compredor, la saca, la vende y blabla. ¿Tú cómo mujer, cómo te identificas con eso? ¿Cómo ves tú ese proceso? ¿Qué te parece? Ah, pues, para mí me es un proceso muy bien, porque... A parte de ayudar a la comunidad con recursos, pues también el compredor de eso se vive, de un trabajo y de eso es que viene a talar los árboles. De ahí saca muchas cosas, saca, por ejemplo, muebles o diversas formas que se pueda construir con la madera. Entonces, nos beneficia mucho, podemos tener, por ..."


Escribimos en el archivo

In [28]:
f.write('\n' + '<<<<<<<<<<<<<<<<<<<Inicio transcripción><<<<<>>>>>>>>>>>>' + '\n')
f.write(transcription["text"])
f.write('\n' + '<<<<<<<<<<<<<<<<<<<fin transcripcion>>>>>>>>>>>>>>>>>>>>>' + '\n')
f.close()