<a href="https://colab.research.google.com/github/shivam110601/sarvam-api-test/blob/main/sarvam_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Modules

In [8]:
!pip install -qU ffmpeg-python

In [9]:
!pip install -q sounddevice "scipy<1.14.0"

In [10]:
!apt-get install -y libportaudio2

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libportaudio2 is already the newest version (19.6.0-1.1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [11]:
import requests
import base64
import io
import json
import wave
import soundfile as sf
import ffmpeg
from IPython.display import Audio
from google.colab import userdata
import sounddevice as sd
from scipy.io.wavfile import write
import numpy as np

sarvam_sub_key = userdata.get('SARVAM_SUBSCRIPTION_KEY')

## Translate ✅

In [25]:
def sarvam_translate(text, target_lang="hi-IN", gender="Male", mode="formal", preprocess=False):
    tr_url = "https://api.sarvam.ai/translate"

    headers = {
        'Content-Type': "application/json",
        'API-Subscription-Key': sarvam_sub_key
    }

    data = {
        "input": text,
        "source_language_code": "en-IN",   # Only english available
        "target_language_code": target_lang,   # Available options: hi-IN, bn-IN, kn-IN, ml-IN, mr-IN, od-IN, pa-IN, ta-IN, te-IN, gu-IN
        "speaker_gender": gender,          # Available options: Male, Female
        "mode": mode,                  # Available options: formal, code-mixed
        "model": "mayura:v1",
        "enable_preprocessing": preprocess
    }

    response = requests.request("POST", tr_url, json=data, headers=headers)
    return json.loads(response.text)['translated_text']

In [26]:
text = "Unfortunately, the direct access to the microphone from a Google Colab environment is not possible."

langs = ["hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", "pa-IN", "ta-IN", "te-IN", "gu-IN"]

print("Translation for", f"\033[1m{text}\033[0m", "without preprocessing.\n")

for lang in langs:
  print(f"For {lang}: \t", f"\033[1m{sarvam_translate(text, target_lang=lang)}\033[0m\n")

Translation for [1mUnfortunately, the direct access to the microphone from a Google Colab environment is not possible.[0m without preprocessing.

For hi-IN: 	 [1mदुर्भाग्य से, गूगल कोलैब वातावरण से माइक्रोफोन तक सीधा अभिगम संभव नहीं है।[0m

For bn-IN: 	 [1mদুর্ভাগ্যবশত, গুগল কোলাব পরিবেশ থেকে মাইক্রোফোনে সরাসরি প্রবেশ করা সম্ভব নয়।[0m

For kn-IN: 	 [1mದುರದೃಷ್ಟವಶಾತ್, ಗೂಗಲ್ ಕೋಲಾಬ್ ಪರಿಸರದಿಂದ ಮೈಕ್ರೋಫೋನ್‌ಗೆ ನೇರ ಪ್ರವೇಶವು ಸಾಧ್ಯವಿಲ್ಲ.[0m

For ml-IN: 	 [1mനിർഭാഗ്യവശാൽ, ഒരു ഗൂഗിൾ കോളാബ് പരിതസ്ഥിതിയിൽ നിന്ന് മൈക്രോഫോണിലേക്കുള്ള നേരിട്ടുള്ള പ്രവേശനം സാധ്യമല്ല.[0m

For mr-IN: 	 [1mदुर्दैवाने, गुगल कोलॅब वातावरणातून मायक्रोफोनपर्यंत थेट प्रवेश शक्य नाही.[0m

For od-IN: 	 [1mଦୁର୍ଭାଗ୍ୟବଶତଃ, ଏକ ଗୁଗୁଲ କୋଲାବ ପରିବେଶରୁ ମାଇକ୍ରୋଫୋନକୁ ସିଧାସଳଖ ପ୍ରବେଶ ସମ୍ଭବ ନୁହେଁ।[0m

For pa-IN: 	 [1mਬਦਕਿਸਮਤੀ ਨਾਲ, ਗੂਗਲ ਕੋਲੈਬ ਵਾਤਾਵਰਣ ਤੋਂ ਮਾਈਕ੍ਰੋਫੋਨ ਤੱਕ ਸਿੱਧੀ ਪਹੁੰਚ ਸੰਭਵ ਨਹੀਂ ਹੈ।[0m

For ta-IN: 	 [1mதுரதிர்ஷ்டவசமாக, கூகுள் கோலாப் சூழலில் இருந்து மைக்ரோஃபோனுக்கு நேரடி அணுகல் சாத்தியமற்றது.[0m

For te-IN: 	 [1mదు

## Text to Speech (hi,mr) ✅

In [None]:
def sarvam_tts(text, tr_lang='hi_IN', voice='meera', preprocess=True):
    tts_url = "https://api.sarvam.ai/text-to-speech"

    payload = {
        "inputs": text,                   # List of Strings
        "target_language_code": tr_lang,  # Available options: hi-IN, bn-IN, kn-IN, ml-IN, mr-IN, od-IN, pa-IN, ta-IN, te-IN, en-IN, gu-IN
        "speaker": "meera",               # Available options: meera, pavithra, maitreyi, arvind, amol, amartya
        "pitch": 0.5,                       # I think from -1 to 1
        "pace": 1.5,                     #
        "loudness": 1.8,
        "speech_sample_rate": 16000,      # Available options: 8000, 16000, 22050
        "enable_preprocessing": preprocess,
        "model": "bulbul:v1"
    }

    headers = {
        'Content-Type': "application/json",
        'API-Subscription-Key': sarvam_sub_key
    }

    response = requests.request("POST", tts_url, json=payload, headers=headers)
    return response.text

In [None]:
# Hindi TTS test with different voices
# All voices sounding same

text = ["दुर्भाग्य से, गूगल कोलैब वातावरण से माइक्रोफोन तक सीधा अभिगम संभव नहीं है।",
        "हरियाणा विधानसभा चुनाव में वोटिंग के दौरान कई जगह हिंसा, फर्जी मतदान को लेकर झड़प: कुल 61.19% मतदान, मेवात में सबसे ज्यादा पड़े वोट"
        ]
voices = ["meera", "pavitra", "maitreyi", "arvind", "amol", "amartya"]

for voice in voices:
    audios = json.loads(sarvam_tts(text, tr_lang='hi-IN', voice=voice, preprocess=True))['audios']
    num = 1
    for audio in audios:
        audio_file = base64.b64decode(audio)
        with open(f"{voice}_{num}.wav", "wb") as f:
            f.write(audio_file)
        num += 1
        display(Audio(f"/content/{voice}_{num-1}.wav"))

In [None]:
# Marathi TTS test with different voices
# All voices sounding same

text = ["विधानसभा निवडणुकीची घोषणा कधीही होण्याची शक्यता असताना महिन्याभरात तिसऱ्यांदा पंतप्रधान नरेंद्र मोदी राज्यात आले आहेत",
        "ठाण्यात येताच पंतप्रधान नरेंद्र मोदींचा जनतेशी मराठीतून संवाद",
        ]
voices = ["meera", "pavitra", "maitreyi", "arvind", "amol", "amartya"]

for voice in voices:
    audios = json.loads(sarvam_tts(text, tr_lang='mr-IN', voice=voice, preprocess=True))['audios']
    num = 11
    for audio in audios:
        audio_file = base64.b64decode(audio)
        with open(f"{voice}_{num}.wav", "wb") as f:
            f.write(audio_file)
        num += 1
        display(Audio(f"/content/{voice}_{num-1}.wav"))