<a href="https://colab.research.google.com/github/suan200A/EJERCICIOS-PRACTICOS-2/blob/main/Traductor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# --- Instalación de librerías ---
!pip install tensorflow tensorflow-datasets librosa gtts transformers tensorflowjs



In [2]:
# --- Importaciones ---
import os
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import librosa
from gtts import gTTS
from transformers import pipeline

In [3]:
# --- Carpeta de salida ---
EXPORT_FOLDER = "/content/carpeta_salida"
TFJS_FOLDER = os.path.join(EXPORT_FOLDER, "tfjs_model")
os.makedirs(TFJS_FOLDER, exist_ok=True)

In [22]:
# --- Cargar el dataset de ejemplo (Speech Commands) ---
dataset_name = "speech_commands"
# Specify the version to avoid the warning about dataset_info.json
(ds_train, ds_test), ds_info = tfds.load(dataset_name, split=["train", "test"], shuffle_files=True, with_info=True, as_supervised=True)

In [23]:
# --- Función para extraer MFCCs ---
def extract_features(audio, sr=16000, max_len=40):
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20).T
    # Asegurar que todas las secuencias tengan el mismo largo
    if len(mfcc) < max_len:
        pad_width = max_len - len(mfcc)
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_len, :]
    return mfcc

In [24]:
# --- Preparar arrays (versión corregida) ---
X_train, y_train = [], []
for audio, label in tfds.as_numpy(ds_train.take(500)):  # tomar solo 500 ejemplos para demo
    # Convertir el audio a numpy array y asegurar que es float32
    audio_np = audio.astype(np.float32)

    # Extraer características MFCC
    features = extract_features(audio_np)

    X_train.append(features)
    y_train.append(label)
X_train = np.array(X_train)
y_train = np.array(y_train)
print(f"Preparación completada. Dimensiones:")
print(f"X_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")

Preparación completada. Dimensiones:
X_train: (500, 40, 20)
y_train: (500,)


In [25]:
# --- Definir y compilar el modelo CNN para audio ---
model_cnn = tf.keras.Sequential([
    # Use Input layer to avoid the input_shape warning
    tf.keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.Conv1D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Conv1D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(ds_info.features["label"].num_classes, activation='softmax')
])
model_cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_cnn.summary()

In [26]:
# --- Entrenamiento del modelo ---
model_cnn.fit(X_train, y_train, epochs=5, batch_size=32)

Epoch 1/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 77ms/step - accuracy: 0.3617 - loss: 44.2590
Epoch 2/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4665 - loss: 8.5400  
Epoch 3/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5124 - loss: 3.7916 
Epoch 4/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4943 - loss: 2.8172 
Epoch 5/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4837 - loss: 2.1032 


<keras.src.callbacks.history.History at 0x7a7325c09580>

In [27]:
# --- Guardar el modelo Keras ---
keras_model_path = os.path.join(EXPORT_FOLDER, "audio_model.keras") # Save in native Keras format
model_cnn.save(keras_model_path)

In [21]:
# --- Exportar a TensorFlow.js ---
import tensorflowjs as tfjs
tfjs.converters.save_keras_model(model_cnn, TFJS_FOLDER)
print("Modelo TF.js exportado en:", TFJS_FOLDER)



failed to lookup keras version from the file,
    this is likely a weight only file
Modelo TF.js exportado en: /content/carpeta_salida/tfjs_model


In [43]:
# --- Inicializar modelos de reconocimiento de voz y traducción ---
asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
translator_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")

Device set to use cpu
Device set to use cuda:0


In [42]:
# --- Funciones para transcripción, traducción y texto a voz ---
def transcribe_audio(file_path):
    result = asr(file_path)
    return result["text"]
def translate_text(text):
    return translator_pipeline(text)[0]['translation_text']
def text_to_speech(text, filename):
    tts = gTTS(text=text, lang='es')
    tts.save(filename)
    return filename
print("Colab: Preparación completa. Modelos guardados y listos para exportar.")

Colab: Preparación completa. Modelos guardados y listos para exportar.


In [44]:
!pip install gradio soundfile



In [47]:
import gradio as gr

In [49]:
def translate_fn(audio_input):
    # audio_input is a tuple: (sample_rate, audio_data)
    # Need to save the audio data to a temporary file for the pipeline
    import soundfile as sf
    import tempfile

    if audio_input is None:
        return "Por favor, sube un archivo de audio."

    sample_rate, audio_data = audio_input

    # Ensure audio_data is in the correct format (e.g., float32)
    if audio_data.dtype != np.float32:
        audio_data = audio_data.astype(np.float32)

    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
        sf.write(tmp_file.name, audio_data, sample_rate)
        audio_path = tmp_file.name

    try:
        txt = transcribe_audio(audio_path)
        tr = translate_text(txt)
        result = f"Texto original:\n{txt}\n\nTraducción (ES):\n{tr}"
    except Exception as e:
        result = f"Ocurrió un error durante el procesamiento: {e}"
    finally:
        # Clean up the temporary file
        os.remove(audio_path)

    return result

demo = gr.Interface(
    fn=translate_fn,
    inputs=gr.Audio(type="numpy", label="Sube tu audio"), # Changed input type
    outputs=gr.Textbox(label="Resultado"),
    title="Traductor de Audio a Español"
)

demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://20c6a49c446506c183.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


