In [None]:
!pip install sarvamai


In [None]:
from sarvamai import SarvamAI

In [None]:
SARVAM_API_KEY = "YOUR_SARVAM_AI_API_KEY"

In [None]:
client = SarvamAI(api_subscription_key=SARVAM_API_KEY)

In [None]:
import sys
import os


def get_audio_file():
    supported_formats = [".wav", ".mp3"]

    if "google.colab" in sys.modules:
        # Running in Google Colab: use upload widget
        from google.colab import files

        uploaded = files.upload()
        audio_file_path = list(uploaded.keys())[0]
        ext = os.path.splitext(audio_file_path)[1].lower()
        if ext not in supported_formats:
            print(f"Unsupported file format '{ext}'. Please upload a WAV or MP3 file.")
            return None
        print(f"File '{audio_file_path}' uploaded successfully in Colab!")
        return audio_file_path
    else:
        # Running in Jupyter Notebook: input file path
        audio_file_path = input("Enter the path to your MP3 or WAV file: ").strip()
        ext = os.path.splitext(audio_file_path)[1].lower()
        if not os.path.exists(audio_file_path):
            print(f"File not found at: {audio_file_path}")
            return None
        if ext not in supported_formats:
            print(f"Unsupported file format '{ext}'. Please provide a WAV or MP3 file.")
            return None
        print(f"File '{audio_file_path}' found successfully in Jupyter!")
        return audio_file_path

In [None]:
# Enter the file path and enter/return.
audio_file_path = get_audio_file()

In [None]:
if audio_file_path:
    with open(audio_file_path, "rb") as audio_file:
        response = client.speech_to_text.translate(file=audio_file, model="saaras:v2.5")
    print("Transcription Response:")
    print(response)
else:
    print("No audio file found. Transcription aborted.")

In [None]:
if audio_file_path:
    with open(audio_file_path, "rb") as audio_file:
        response = client.speech_to_text.translate(file=audio_file, model="saaras:v2.5")
    print(response)
else:
    print("No valid audio file found.")

In [None]:
if audio_file_path:
    with open(audio_file_path, "rb") as audio_file:
        response = client.speech_to_text.translate(
            file=audio_file,
            model="saaras:v2.5",
        )
    print(response)
else:
    print("No valid audio file found.")

In [None]:
if audio_file_path:
    with open(audio_file_path, "rb") as audio_file:
        response = client.speech_to_text.translate(
            file=audio_file, model="saaras:v2.5", prompt="Medical consultation"
        )
    print(response)
else:
    print("No valid audio file found.")

In [None]:
import os
import subprocess


def split_audio_ffmpeg(audio_path, chunk_duration=29, output_dir="chunks"):
    os.makedirs(output_dir, exist_ok=True)
    ext = os.path.splitext(audio_path)[1].lower()
    base_name = os.path.splitext(os.path.basename(audio_path))[0]
    output_pattern = os.path.join(output_dir, f"{base_name}_%03d{ext}")

    codec = "pcm_s16le" if ext == ".wav" else "libmp3lame"

    command = [
        "ffmpeg",
        "-i",
        audio_path,
        "-f",
        "segment",
        "-segment_time",
        str(chunk_duration),
        "-c:a",
        codec,
        output_pattern,
    ]

    print("Running command:", " ".join(command))

    result = subprocess.run(command, capture_output=True, text=True)
    print("Return code:", result.returncode)
    print("STDOUT:\n", result.stdout)
    print("STDERR:\n", result.stderr)

    output_files = sorted(
        [os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(ext)]
    )

    print("Chunks generated:", output_files)
    return output_files

In [None]:
def translate_audio_chunks(chunk_paths, client, model="saaras:v2.5"):

    full_transcript = []

    for idx, chunk_path in enumerate(chunk_paths):
        print(f"\nTranslating chunk {idx + 1}/{len(chunk_paths)} ‚Üí {chunk_path}")
        with open(chunk_path, "rb") as audio_file:
            try:
                response = client.speech_to_text.translate(file=audio_file, model=model)
                print("Chunk Response:", response)
                full_transcript.append(str(response))
            except Exception as e:
                print(f"Error with chunk {chunk_path}: {e}")

    return " ".join(full_transcript).strip()

In [None]:
# 1. Split the audio
chunks = split_audio_ffmpeg(audio_file_path)

# 2. Translate each chunk and collate
if chunks:
    final_transcript = translate_audio_chunks(chunks, client)
    print("\nFinal Combined Transcript:\n")
    print(final_transcript)
else:
    print("No audio chunks generated. Transcription aborted.")

## **2. Authentication**

To use the API, you need an API subscription key. Follow these steps to set up your API key:

1. **Obtain your API key**: If you don‚Äôt have an API key, sign up on the [Sarvam AI Dashboard](https://dashboard.sarvam.ai/) to get one.
2. **Replace the placeholder key**: In the code below, replace "YOUR_SARVAM_AI_API_KEY" with your actual API key.

## **6. Error Handling**  

You may encounter these errors while using the API:  

| Error Code | HTTP Status | Cause | Solution |
|------------|-------------|-------|----------|
| `invalid_api_key_error` | 403 Forbidden | Invalid API key | Use a valid API key from the [Sarvam AI Dashboard](https://dashboard.sarvam.ai/) |
| `insufficient_quota_error` | 429 Too Many Requests | Exceeded API quota | Check your usage, upgrade if needed, or implement exponential backoff |
| `internal_server_error` | 500 Internal Server Error | Issue on servers | Try again later. If persistent, contact support |
| `invalid_request_error` | 400 Bad Request | Incorrect request formatting | Verify your request structure and parameters |
| `rate_limit_exceeded_error` | 429 Too Many Requests | Rate limit exceeded | Implement rate limiting and retry with backoff |


## **7. Additional Resources**

For more details, refer to the our official documentation and we are always there to support and help you on our Discord Server:

- **Documentation**: [docs.sarvam.ai](https://docs.sarvam.ai)  
- **Community**: [Join the Discord Community](https://discord.gg/hTuVuPNF)

## **8. Final Notes**

- Keep your API key secure.
- Use clear audio for best results.
- Saaras automatically detects the input language and translates to English.
- Use the `prompt` parameter for domain-specific translations (e.g., medical, legal).

**Keep Building!** üöÄ