<a href="https://colab.research.google.com/github/kumar045/Assignment-For-Filed/blob/main/Indian_Transcription_Translation_Updated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install nemo_toolkit['all']

Collecting nemo_toolkit[all]
  Downloading nemo_toolkit-1.23.0-py3-none-any.whl.metadata (18 kB)
Collecting onnx>=1.7.0 (from nemo_toolkit[all])
  Downloading onnx-1.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting ruamel.yaml (from nemo_toolkit[all])
  Downloading ruamel.yaml-0.18.6-py3-none-any.whl.metadata (23 kB)
Collecting wget (from nemo_toolkit[all])
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting black==19.10b0 (from nemo_toolkit[all])
  Downloading black-19.10b0-py36-none-any.whl.metadata (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting click==8.0.2 (from nemo_toolkit[all])
  Downloading click-8.0.2-py3-none-any.whl.metadata (3.2 kB)
Collecting isort<6.0.0,>5.1.0 (from nemo_toolkit[all])
  Downloading isort-5.13.2-py3-none-any.whl.metadata (12 kB)
Collecting parameterized (from nemo_toolkit

In [None]:
import nemo.collections.asr as nemo_asr
from pydub import AudioSegment
from io import BytesIO
import tempfile
import os

def convert_mp3_to_wav_in_memory(mp3_file):
    """
    Converts an MP3 file to WAV format and returns it as a BytesIO object.
    """
    try:
        # Load the MP3 file
        audio = AudioSegment.from_mp3(mp3_file)
        # Export as WAV to a BytesIO object
        wav_io = BytesIO()
        audio.export(wav_io, format="wav")
        wav_io.seek(0)  # Reset buffer position to the beginning
        print("MP3 converted to WAV in memory")
        return wav_io
    except Exception as e:
        print(f"Error converting MP3 to WAV: {e}")
        return None

def convert_stereo_to_mono_in_memory(wav_io):
    """
    Converts a stereo WAV BytesIO object to mono and returns it as a BytesIO object.
    """
    try:
        # Load the WAV file from BytesIO
        audio = AudioSegment.from_file(wav_io, format="wav")
        # Convert to mono
        mono_audio = audio.set_channels(1)
        # Export as mono WAV to a new BytesIO object
        mono_wav_io = BytesIO()
        mono_audio.export(mono_wav_io, format="wav")
        mono_wav_io.seek(0)  # Reset buffer position to the beginning
        print("Stereo WAV converted to Mono in memory")
        return mono_wav_io
    except Exception as e:
        print(f"Error converting stereo to mono: {e}")
        return None

def transcribe_audio(asr_model, wav_io):
    """
    Transcribes a mono audio BytesIO object using NeMo ASR model.
    Uses a temporary file to save the in-memory audio data.
    """
    try:
        # Create a temporary file to store the WAV data
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_wav_file:
            # Write the in-memory BytesIO audio to the temporary file
            temp_wav_file.write(wav_io.read())
            temp_wav_file.flush()  # Ensure all data is written

            # Transcribe the temporary WAV file
            transcription = asr_model.transcribe([temp_wav_file.name])
            print(f"Transcription: {transcription}")
            return transcription
    except Exception as e:
        print(f"Error transcribing audio: {e}")
        return None

def is_wav_file(file_path):
    """
    Checks if the given file is a WAV file.
    """
    return file_path.lower().endswith('.wav')

def main():
    # Path for input file (can be MP3 or WAV)
    input_file = "/content/drive/MyDrive/ਪਲ ਚ ਟਰਕਟਰ ਵੜਨ ਤ ਪਹਲ ਵਹਗਰ ਗਰਮਤਰ ਦ ਜਪ ਜਰਰ ਕਰ Bhai Satnam Singh Lalu Ghuman [TubeRipper.com].wav"

    if is_wav_file(input_file):
        print("Input file is already in WAV format.")
        # Load the WAV file directly as a BytesIO object
        with open(input_file, 'rb') as f:
            wav_io = BytesIO(f.read())
    else:
        # Convert MP3 to WAV in memory
        wav_io = convert_mp3_to_wav_in_memory(input_file)
        if wav_io is None:
            return

    # Convert stereo WAV to mono in memory
    mono_wav_io = convert_stereo_to_mono_in_memory(wav_io)
    if mono_wav_io is None:
        return

    # Load NeMo ASR model from .nemo file
    model_path = "/content/drive/MyDrive/Conformer-CTC-BPE-Large.nemo"
    try:
        asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from(model_path)
        print("Model loaded successfully.")
    except Exception as e:
        print(f"Error loading model: {e}")
        return

    # Transcribe the mono audio
    transcribe_audio(asr_model, mono_wav_io)

if __name__ == "__main__":
    main()


Input file is already in WAV format.
Stereo WAV converted to Mono in memory
[NeMo I 2024-08-20 09:45:58 mixins:172] Tokenizer SentencePieceTokenizer initialized with 128 tokens


[NeMo W 2024-08-20 09:45:58 modelPT:165] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath: /root/ekstep/nemo_exp/vakyansh-nemo-experimentation/data/punjabi_re/punjabi_train_manifest.json
    sample_rate: 16000
    batch_size: 24
    shuffle: true
    num_workers: 16
    pin_memory: true
    use_start_end_token: false
    trim_silence: false
    max_duration: 30
    min_duration: 0.1
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    bucketing_strategy: synced_randomized
    bucketing_batch_size: 8
    
[NeMo W 2024-08-20 09:45:58 modelPT:172] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). 
    Validation config : 
    manifest_filepath: /ro

[NeMo I 2024-08-20 09:45:58 features:289] PADDING: 0
[NeMo I 2024-08-20 09:46:01 save_restore_connector:249] Model EncDecCTCModelBPE was successfully restored from /content/drive/MyDrive/Conformer-CTC-BPE-Large.nemo.
Model loaded successfully.


Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

Transcription: ['ਜਦੋਂ ਕਾਰਜ ਕਰ ਮੈਂ ਤਿਉਹਾਰ ਤੇ ਬੇਨਤੀ ਕਰਦਾ ਇੱਕ ਆਦ ਬਣਾੋ ਮੇਰੇ ਨੌਜਵਾਨ ਵੀਰ ਹੁਣ ਤੇਟਰਾਂ ਦੇਡੇਹੇ ਇੱਕ ਮੈਂ ਬੇਨਤੀ ਕਰਦਾਝੋਨ ਲੱਗਣੇ ਨੇ ਹਰ ਪਹਿਲੀਦੋਂ ਤੁਸੀਂ ਾਇੈਕਟਰ ਵ ਜੋਂ ਸਹਿਬ ਦਾ ਜਾਬ ਪਾਉਂਦਾ ਤੇ ਬਹੁਤ ਵਧੀਗਾ ਗੱਲ ਨਹੀਂਹਿ ਦਾ ਜਾ ਪਾਉਂਦਾ ਕਿ ਡਇਕਟਰ ਬਾਰਾਹ ਗੁਰੂ ਗੁਰ ਮੰਤਰ ਦਾਬ ਪੰਜ ਸੱਤ ਮਿੰਟ ਦਸ ਮਿੰਟ ਕਰਕੇ ਫਿਰ ਕਿਤੇ ਆਪਣੀ ਖੇਤੀਬਾੜੀ ਕਰਕੇ ਦੇਖੇਖੇਡੇ ਘਰਾਂ ਵਿੱਚ ਖੁਸ਼ ਪ੍ਰਤੀੱ ਕਰਕੇ ਦੇਖੇ ਤ ਇਹ ਬੇਨਤੀਾਂ ਕਰ ਰਿਹਾ ਜਿਹੜਾ ਕਾਰਜ ਵਿੱਚ ਤੁਸੀਂ ਯਾਦ ਕਰੋ ਕਾਰਜ ਵਿੱਚ ਪਣੇ ਸਹਾਈ ਹੁੰਦਾ ਆ']


In [None]:
!pip install noisereduce

Collecting noisereduce
  Downloading noisereduce-3.0.2-py3-none-any.whl.metadata (14 kB)
Downloading noisereduce-3.0.2-py3-none-any.whl (22 kB)
Installing collected packages: noisereduce
Successfully installed noisereduce-3.0.2


In [None]:
import noisereduce as nr
import librosa
import soundfile as sf

# Load the audio file
audio_file = '/content/Udaarian (Badi lambi hai kahani mere pyaar di) - Satinder Sartaaj  Love Songs  New Punjabi Songs (mp3cut.net).mp3'
audio, sr = librosa.load(audio_file, sr=None)

# Reduce noise
reduced_noise = nr.reduce_noise(y=audio, sr=sr)

# Save the reduced noise audio to a new file
sf.write('reduced_noise_song.wav', reduced_noise, sr)


In [None]:
import subprocess
import sys
import os

def run_command(command):
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, error = process.communicate()
    if process.returncode != 0:
        print(f"Error executing command: {command}")
        print(error.decode('utf-8'))
        sys.exit(1)
    return output.decode('utf-8')

# Uninstall any existing version
print("Uninstalling existing IndicTransTokenizer...")
run_command(f"{sys.executable} -m pip uninstall -y IndicTransTokenizer")

# Install directly from GitHub
print("Installing IndicTransTokenizer from GitHub...")
result = run_command(f"{sys.executable} -m pip install git+https://github.com/VarunGumma/IndicTransTokenizer.git")
print(result)

print("Installation completed. Let's try importing now.")

# Try importing
try:
    from IndicTransTokenizer import IndicProcessor
    print("Successfully imported IndicProcessor!")
except ImportError as e:
    print(f"Import failed: {e}")

# Now let's proceed with the translation part
print("\nProceeding with translation...")

import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model_name = "ai4bharat/indictrans2-indic-en-1B"
print(f"Loading tokenizer and model from {model_name}...")

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True).to(DEVICE)

print("Initializing IndicProcessor...")
ip = IndicProcessor(inference=True)

input_sentences = [
    "ਜਦੋਂ ਕਾਰਜ ਕਰ ਮੈਂ ਤਿਉਹਾਰ ਤੇ ਬੇਨਤੀ ਕਰਦਾ ਇੱਕ ਆਦ ਬਣਾੋ ਮੇਰੇ ਨੌਜਵਾਨ ਵੀਰ ਹੁਣ ਤੇਟਰਾਂ ਦੇਡੇਹੇ ਇੱਕ ਮੈਂ ਬੇਨਤੀ ਕਰਦਾਝੋਨ ਲੱਗਣੇ ਨੇ ਹਰ ਪਹਿਲੀਦੋਂ ਤੁਸੀਂ ਾਇੈਕਟਰ ਵ ਜੋਂ ਸਹਿਬ ਦਾ ਜਾਬ ਪਾਉਂਦਾ ਤੇ ਬਹੁਤ ਵਧੀਗਾ ਗੱਲ ਨਹੀਂਹਿ ਦਾ ਜਾ ਪਾਉਂਦਾ ਕਿ ਡਇਕਟਰ ਬਾਰਾਹ ਗੁਰੂ ਗੁਰ ਮੰਤਰ ਦਾਬ ਪੰਜ ਸੱਤ ਮਿੰਟ ਦਸ ਮਿੰਟ ਕਰਕੇ ਫਿਰ ਕਿਤੇ ਆਪਣੀ ਖੇਤੀਬਾੜੀ ਕਰਕੇ ਦੇਖੇਖੇਡੇ ਘਰਾਂ ਵਿੱਚ ਖੁਸ਼ ਪ੍ਰਤੀੱ ਕਰਕੇ ਦੇਖੇ ਤ ਇਹ ਬੇਨਤੀਾਂ ਕਰ ਰਿਹਾ ਜਿਹੜਾ ਕਾਰਜ ਵਿੱਚ ਤੁਸੀਂ ਯਾਦ ਕਰੋ ਕਾਰਜ ਵਿੱਚ ਪਣੇ ਸਹਾਈ ਹੁੰਦਾ ਆ"
]

src_lang, tgt_lang = "pan_Guru", "eng_Latn"
print("Preprocessing batch...")
batch = ip.preprocess_batch(input_sentences, src_lang=src_lang, tgt_lang=tgt_lang)

print("Tokenizing inputs...")
inputs = tokenizer(
    batch,
    truncation=True,
    padding="longest",
    return_tensors="pt",
    return_attention_mask=True,
)

# Move inputs to the same device as the model
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}

print("Generating translations...")
with torch.no_grad():
    generated_tokens = model.generate(
        **inputs,
        use_cache=True,
        min_length=0,
        max_length=256,
        num_beams=5,
        num_return_sequences=1,
    )

print("Decoding generated tokens...")
generated_tokens = generated_tokens.cpu()  # Move back to CPU for decoding
with tokenizer.as_target_tokenizer():
    generated_tokens = tokenizer.batch_decode(
        generated_tokens.tolist(),
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True,
    )

print("Postprocessing translations...")
translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)

print("\nTranslations:")
for input_sentence, translation in zip(input_sentences, translations):
    print(f"{src_lang}: {input_sentence}")
    print(f"{tgt_lang}: {translation}")
    print()

Uninstalling existing IndicTransTokenizer...
Installing IndicTransTokenizer from GitHub...
Collecting git+https://github.com/VarunGumma/IndicTransTokenizer.git
  Cloning https://github.com/VarunGumma/IndicTransTokenizer.git to /tmp/pip-req-build-v0xm48a_
  Resolved https://github.com/VarunGumma/IndicTransTokenizer.git to commit d4799ebf9c42fd72282b37cf82856bbd0db8c782
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting indic-nlp-library-IT2@ git+https://github.com/VarunGumma/indic_nlp_library (from IndicTransTokenizer==1.0.1)
  Cloning https://github.com/VarunGumma/indic_nlp_library to /tmp/pip-install-9yxkacyl/indic-nlp-library-it2_386e35218dec4433a0d3ecdb67b5ecd8
  Resolved https://github.com/VarunGumma/indic_nlp_library to commit 601521e05ed0ed8f2165ac317a47d186e25b6f0d
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: IndicTransTok

In [6]:
!pip install git+https://github.com/sanchit-gandhi/whisper-jax.git
!pip uninstall huggingface_hub -y
!pip install huggingface_hub

Collecting huggingface_hub
  Downloading huggingface_hub-0.24.6-py3-none-any.whl.metadata (13 kB)
Downloading huggingface_hub-0.24.6-py3-none-any.whl (417 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m417.5/417.5 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: huggingface_hub
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tokenizers 0.14.1 requires huggingface_hub<0.18,>=0.16.4, but you have huggingface-hub 0.24.6 which is incompatible.[0m[31m
[0mSuccessfully installed huggingface_hub-0.24.6


In [3]:
from whisper_jax import FlaxWhisperForConditionalGeneration, FlaxWhisperPipline
import jax.numpy as jnp

pipeline = FlaxWhisperPipline('parthiv11/indic_whisper_nodcil', dtype=jnp.bfloat16)
transcript= pipeline('/content/ਪਲ ਚ ਟਰਕਟਰ ਵੜਨ ਤ ਪਹਲ ਵਹਗਰ ਗਰਮਤਰ ਦ ਜਪ ਜਰਰ ਕਰ Bhai Satnam Singh Lalu Ghuman [TubeRipper.com].m4a')
transcript

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some of the weights of FlaxWhisperForConditionalGeneration were initialized in float16 precision from the model checkpoint at parthiv11/indic_whisper_nodcil:
[('model', 'decoder', 'embed_positions', 'embedding'), ('model', 'decoder', 'embed_tokens', 'embedding'), ('model', 'decoder', 'layer_norm', 'bias'), ('model', 'decoder', 'layer_norm', 'scale'), ('model', 'decoder', 'layers', '0', 'encoder_attn', 'k_proj', 'kernel'), ('model', 'decoder', 'layers', '0', 'encoder_attn', 'out_proj', 'bias'), ('model', 'decoder', 'layers', '0', 'encoder_attn', 'out_proj', 'kernel'), ('model', 'decoder', 'layers', '0', 'encoder_attn', 'q_proj', 'bias'), ('model', 'decoder', 'layers', '0', 'encoder_attn', 'q_proj', 'kernel'), ('model', 'decoder', 'layers', '0', 'encoder_a

{'text': 'जद्दो को कार्य करो न मैं त्यौहार से पे बेनती करता हूँ एक आदत बना लो मेरे नौजवान वीरों वैसे ट्रैक्टर न देखते हैं तुम्हें अड़े या ढांचा नहीं देने एक मैं पेनती करता हूँ तुम्हें चुन ने लग गए न हर पहली बार जगह तुझे ट्रैक्टर बाड़ो न जिसे तुम्हें जपजी सेवा जाप पाउँदा बहुत बड़ी बात है नहीं न जपजी सेवा जाप पाउँदा किसी ट्रैक्टर बार खेलो के वाही गुरु गुरुमंतर जाप पाँच सात मिनट दस मिनट करके फिर किसी अपनी खेती बड़ी करके देखो देखो तो अपने कराए खुशियाँ के प्रयत्न करके देखो किसी तो मई ये पेंट का कर रहे है जिन कार्य में तुझे रापुर याद करो उस कार्य में आप आने के सहायी होता आप'}