<a href="https://colab.research.google.com/github/namrata935/SandalTalk/blob/main/sandal_mainpgm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**SandalTalk**
this is the main program of sandaltalk.

##**installation of libraries**
isntall the following libraries

In [2]:
!pip install SpeechRecognition
!pip install pydub
!pip install pandas
!apt-get install ffmpeg
!pip install langdetect
!pip install fuzzywuzzy
!pip install transformers
!pip install deep-translator
!pip install ipywidgets

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.11.0-py2.py3-none-any.whl.metadata (28 kB)
Downloading SpeechRecognition-3.11.0-py2.py3-none-any.whl (32.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.11.0
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m12.7 MB/s[0

##**main code**
note:executing for the first time might take some time upto 1min. kindly stop and run again if issue persists.
trying to uplaod same file twice results in an error so please change file name if uplaoding again.

In [13]:
import speech_recognition as sr
import json
import pandas as pd
from google.colab import files, drive
from transformers import T5ForConditionalGeneration, T5Tokenizer
import os
import string
from langdetect import detect
from fuzzywuzzy import fuzz
import re
import ipywidgets as widgets
from IPython.display import display
from deep_translator import GoogleTranslator

dataset_path = 'https://raw.githubusercontent.com/namrata935/SandalTalk/refs/heads/main/cleaned_transcriptions_dataset%20-%20cleaned_transcriptions_dataset.csv'
df = pd.read_csv(dataset_path)


recognizer = sr.Recognizer()

# loading the t5 model
model = T5ForConditionalGeneration.from_pretrained('t5-small')
tokenizer = T5Tokenizer.from_pretrained('t5-small')

# recognizes audio and transcribes it to text
def recognize_audio_from_file(audio_file):
    with sr.AudioFile(audio_file) as source:
        print("Transcribing audio...")
        audio = recognizer.record(source)
        kannada_text = None
        english_text = None

        try:
            kannada_text = recognizer.recognize_google(audio, language="kn-IN")

            print(f"Kannada Transcription: {kannada_text}")
        except sr.UnknownValueError:
            print("Could not understand the audio in Kannada.")
        except sr.RequestError:
            print("API error during Kannada recognition.")

        try:
            english_text = recognizer.recognize_google(audio, language="en-IN")


            print(f"English Transcription: {english_text} ")
        except sr.UnknownValueError:
            print("Could not understand the audio in English.")
        except sr.RequestError:
            print("API error during English recognition.")

        return kannada_text, english_text

# refining the answer using t5 model
def refine_answer_with_t5(answer_text, question_text=""):
    prompt = f"Answer the following question: {question_text} based on this context: {answer_text} ."

    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    output = model.generate(input_ids, max_length=100, num_beams=4, early_stopping=True)

    refined_text = tokenizer.decode(output[0], skip_special_tokens=True)


    if not refined_text.strip():
        print("No refined answer generated. Returning the original answer.")
        return answer_text

    refined_sentences = refined_text.split('.')
    filtered_sentences = [sentence.strip() for sentence in refined_sentences if not sentence.strip().endswith('?')]

    return '. '.join(filtered_sentences).strip()

# cleaning text
def clean_text(text):
    # Check if the input is a string and not NaN or a float
    if isinstance(text, str):
        text = text.lower()  # Convert to lowercase
        text = text.translate(str.maketrans("", "", string.punctuation))  # Remove punctuation
        text = text.strip()  # Remove leading and trailing spaces
    else:
        return ""  # Return empty string if input is not a valid string
    return text


# cout matches of word in dataset
def count_word_matches(transcription_text, dataset_text):
    transcription_words = set(clean_text(transcription_text).split())
    dataset_words = set(clean_text(dataset_text).split())
    return len(transcription_words.intersection(dataset_words))

# translating
def translate_to_kannada(text):
    translator = GoogleTranslator(source='en', target='kn')
    return translator.translate(text)

# match transcriptions
def match_and_refine_answer(transcription_text, df, language="english"):
    best_match_row = None
    max_matches = 0

    for _, row in df.iterrows():
        if language == "kannada":
            matches = count_word_matches(transcription_text, row['Transcription'])
        else:
            matches = count_word_matches(transcription_text, row['English_Translation'])

        if matches > max_matches:
            max_matches = matches
            best_match_row = row

    if best_match_row is not None and max_matches > 0:
        print(f"Original Kannada Answer: {best_match_row['Transcription']}")
        print(f"Original English Answer: {best_match_row['English_Translation']}\n")

        if language == "english":
            refined_english = refine_answer_with_t5(best_match_row['English_Translation'], transcription_text)
            print(f"Refined English Answer: {refined_english}\n")


            original_kannada_translation = translate_to_kannada(best_match_row['English_Translation'])
            refined_kannada_translation = translate_to_kannada(refined_english)

            print(f"Original Answer Translated to Kannada: {original_kannada_translation}")
            print(f"Refined Answer Translated to Kannada: {refined_kannada_translation}")

    else:
      #put default case
        print("No matching rows found for transcription.")

# button for lang selection of audio file
def get_user_language_choice():
    print("Choose the audio language:")
    print("ಭಾಷೆಯನ್ನು ಆಯ್ಕೆ ಮಾಡಿ:\n ")
    button_kannada = widgets.Button(description="Kannada")
    button_english = widgets.Button(description="English")

    output = widgets.Output()

    def on_kannada_button_click(b):
        with output:
            print("You selected Kannada!")
        global language_choice
        language_choice = "kannada"

    def on_english_button_click(b):
        with output:
            print("You selected English!")
        global language_choice
        language_choice = "english"

    button_kannada.on_click(on_kannada_button_click)
    button_english.on_click(on_english_button_click)

    display(button_kannada, button_english, output)

get_user_language_choice()

while 'language_choice' not in globals():
    pass

uploaded_audio = files.upload()
uploaded_file = list(uploaded_audio.keys())[0]
wav_file = os.path.splitext(uploaded_file)[0] + ".wav"

os.system(f"ffmpeg -i {uploaded_file} {wav_file}")
print(f"Conversion complete: {wav_file}")

if language_choice == "kannada":
    kannada_text, english_text = recognize_audio_from_file(wav_file)
    print(f"Kannada Transcription: {kannada_text}")
    match_and_refine_answer(kannada_text, df, language="kannada")
elif language_choice == "english":
    kannada_text, english_text = recognize_audio_from_file(wav_file)

    refined_english = refine_answer_with_t5(english_text)
    print(f"Refined English Answer: {refined_english} ")
    match_and_refine_answer(english_text, df, language="english")

if kannada_text:
    kannada_data = {
        "audio_filename": wav_file,
        "transcription": kannada_text
    }
    with open("/content/kannada_transcription_output.json", "w") as kannada_json_file:
        json.dump(kannada_data, kannada_json_file, ensure_ascii=False, indent=4)
else:
    print("Kannada transcription failed.")

if english_text:
    english_data = {
        "audio_filename": wav_file,
        "transcription": english_text
    }
    with open("/content/english_transcription_output.json", "w") as english_json_file:
        json.dump(english_data, english_json_file, ensure_ascii=False, indent=4)
else:
    print("English transcription failed.")


Choose the audio language:
ಭಾಷೆಯನ್ನು ಆಯ್ಕೆ ಮಾಡಿ:
 


Button(description='Kannada', style=ButtonStyle())

Button(description='English', style=ButtonStyle())

Output()

Saving newaudio05.ogg to newaudio05.ogg
Conversion complete: newaudio05.wav
Transcribing audio...
Kannada Transcription: ವಿಚ್ ಇಸ್ ದಿ ಐಡಿಯಲ್ ಕೆಮಿಕಲ್ ಕಾಂಪಿಟೇಶನ್ ಆಫ್ ಸ್ಯಾಂಡಲ್ ವರ್ಡ್
English Transcription: which is the ideal chemical composition of Sandalwood 
Refined English Answer: Answer the following question: 
Original Kannada Answer: ವಚ ಇಸ ದ ಐಡಯಲ ಕಮಕಲ ಕಬನಷನ ಆಫ ಸಯಡಲವಡ ಅದರ ಜಗತತನಲಲ ಉತಕಷಟ ಮಟಟದ ಶರಗಧ ಎಲಲಯದರ ಸಗತತ ಅದರ ಅದ ನಮಮ ಕರನಟಕದಲಲ ಸಗತತ ಅದರ ಇಲಲ ಇರತಕಕತ ಭತಕ ಅಶಗಳ ಫಯಕಟರಸ ಲಕ ವಟರ ಅಡ ಕಲಮಟ ಭತಕ ಅಶಗಳನನ ಮಣಣ ನರ ಮತತ ಅವಗಳ ಜಗತತನಲಲ ಉತಕಷಟ ಮತತ ಶರಗಧ ಬಳಯಲಕಕ ಅದಭತವದ ಪರಕ ವತವರಣ ಈ ನಮಮ ಕನನಡ ನಡನಲಲದ ಹಗಗ ನಮಮ ಕನನಡ ನಡಗ ಕನನಡ ನಡ ಗಧದ ನಡ ಚದನದ ಬಡ ಅತ ನನನ ಮನ ಕರದಲಲ ಶತರ ಶತಮನದದ ಆದಕವ ರನನ ಪಪ ಹಳಗನನಡ
Original English Answer: which is the ideal chemical combination of sandalwood the best level of sandalwood can be found anywhere in the world it can be found in our karnataka physical factors like water and climate for kannada land kannada land is the land of sandalwood the land of sandalwood the enemy did not come home yesterday

Refine