In [None]:
from google.colab import drive
import zipfile
import os

# Mount Google Drive
drive.mount('/content/drive')


In [None]:
import os
import librosa

# Path to your dataset folder
dataset_folder = '/content/nonsensitive'

# Function to read audio files from the dataset folder
def read_dataset(folder):
    dataset = []
    labels = []
    for label in os.listdir(folder):
        label_folder = os.path.join(folder, label)
        if os.path.isdir(label_folder):
            for audio_file in os.listdir(label_folder):
                if audio_file.endswith('.wav'):  # Assuming your audio files are in WAV format
                    file_path = os.path.join(label_folder, audio_file)
                    audio_data, sample_rate = librosa.load(file_path, sr=None)
                    dataset.append(audio_data)
                    labels.append(label)
    return dataset, labels

# Read the dataset
data, labels = read_dataset(dataset_folder)

# Example: print the number of audio files and their corresponding labels
print("Number of audio files:", len(data))
print("Labels:", labels)


In [None]:
from collections import Counter


# Count the occurrences of each label
label_counts = Counter(labels)

# Print the label counts
for label, count in label_counts.items():
    print(f"Label: {label}, Count: {count}")


Label: nonsensitive, Count: 99
Label: sensitive, Count: 45


In [None]:
import os
import librosa
import numpy as np

# Define the function for preprocessing audio files
def preprocess_audio_folder(folder_path, sample_rate=16000, duration=2):
    preprocessed_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.wav'):
            audio_file = os.path.join(folder_path, filename)
            preprocessed_audio = preprocess_audio(audio_file, sample_rate, duration)
            preprocessed_data.append((preprocessed_audio, filename))  # Store preprocessed audio and filename
    return preprocessed_data

# Function to preprocess a single audio file
def preprocess_audio(audio_file, sample_rate=16000, duration=2):
    # Load audio file
    audio_data, _ = librosa.load(audio_file, sr=sample_rate, duration=duration)

    # Trim silence
    audio_data, _ = librosa.effects.trim(audio_data)

    # Extract MFCC features
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)

    # Pad or truncate MFCCs to a fixed length
    max_len = 100
    if mfccs.shape[1] < max_len:
        mfccs = np.pad(mfccs, ((0, 0), (0, max_len - mfccs.shape[1])), mode='constant')
    else:
        mfccs = mfccs[:, :max_len]

    return mfccs

# Example usage
folder_path = '/content/drive/MyDrive/voice dataset NLP/TextDataset/VoiceData'
preprocessed_data = preprocess_audio_folder(folder_path)

# Example print the preprocessed data
for audio_data, filename in preprocessed_data:
    print("Filename:", filename)
    print("Preprocessed Data Shape:", audio_data.shape)
    # Use preprocessed data for further processing or model training


In [None]:
pip install SpeechRecognition


Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.4-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m42.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.10.4


In [None]:
import speech_recognition as sr


In [None]:
import os
import librosa
import speech_recognition as sr

# Path to your dataset folder
dataset_folder = '/content/drive/MyDrive/voice dataset NLP/TextDataset/VoiceData'

# Function to read audio files from the dataset folder
def read_dataset(folder):
    dataset = []
    labels = []
    recognizer = sr.Recognizer()
    for label in os.listdir(folder):
        label_folder = os.path.join(folder, label)
        if os.path.isdir(label_folder):
            for audio_file in os.listdir(label_folder):
                if audio_file.endswith('.wav'):  # Assuming your audio files are in WAV format
                    file_path = os.path.join(label_folder, audio_file)
                    audio_data, sample_rate = librosa.load(file_path, sr=None)
                    text = recognize_audio(recognizer, file_path)
                    dataset.append((audio_data, text))
                    labels.append(label)
    return dataset, labels

# Function to recognize speech from an audio file
def recognize_audio(recognizer, file_path):
    with sr.AudioFile(file_path) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            return text
        except sr.UnknownValueError:
            return "Unknown"
        except sr.RequestError as e:
            return "Error: {0}".format(e)

# Read the dataset
data, labels = read_dataset(dataset_folder)

# Example: print the number of audio files, their corresponding labels, and recognized text
for i in range(len(data)):
    print("Audio File:", i+1)
    print("Label:", labels[i])
    print("Recognized Text:", data[i][1])
    print("------------")


Audio File: 1
Label: nonsensitive
Recognized Text: so if there is nothing else we need to discuss let's move on to today's agenda
------------
Audio File: 2
Label: nonsensitive
Recognized Text: the meeting was declared closed at 11.30
------------
Audio File: 3
Label: nonsensitive
Recognized Text: hi it's me
------------
Audio File: 4
Label: nonsensitive
Recognized Text: let me just summarize the main points of the last meeting
------------
Audio File: 5
Label: nonsensitive
Recognized Text: thank you Tom
------------
Audio File: 6
Label: nonsensitive
Recognized Text: have you all received a copy of today's agenda
------------
Audio File: 7
Label: nonsensitive
Recognized Text: you should sign up for that seminar next year
------------
Audio File: 8
Label: nonsensitive
Recognized Text: after briefly revising the changes that will take place we moved on to a brainstorming session concerning after customer support improvements
------------
Audio File: 9
Label: nonsensitive
Recognized Text:

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import numpy as np

# Define a function to extract features from audio data
def extract_features(audio_data):
    # Placeholder for feature extraction process
    # Replace this with your feature extraction code (e.g., MFCC, log-mel spectrogram, etc.)
    # For demonstration purposes, we'll use a random feature vector with 20 dimensions
    return np.random.rand(20)

# Read the dataset and preprocess audio data
processed_data, labels = read_dataset(dataset_folder)

# Extract features from the processed data
X = [extract_features(data) for data, _ in processed_data]
y = labels

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the classifier
clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Generate classification report
report = classification_report(y_test, y_pred)

# Print the classification report
print(report)


              precision    recall  f1-score   support

nonsensitive       0.72      0.86      0.78        21
   sensitive       0.25      0.12      0.17         8

    accuracy                           0.66        29
   macro avg       0.48      0.49      0.47        29
weighted avg       0.59      0.66      0.61        29



In [None]:
!pip install googletrans==4.0.0-rc1




In [None]:
from googletrans import Translator

# Initialize the translator
translator = Translator()

# Example English text to translate
english_text = "welcome"

# Translate English text to Hindi
translation = translator.translate(english_text, dest='hi')

# Print the translated text
print("Translated Text (Hindi):", translation.text)


Translated Text (Hindi): स्वागत


In [None]:
from googletrans import Translator

# Function to translate text to Telugu
def translate_to_telugu(text):
    translator = Translator()
    translation = translator.translate(text, dest='te')
    return translation.text

# Main function
def main():
    # English text to translate
    english_text = "arthi what are you doing?"

    # Translate to Telugu
    telugu_translation = translate_to_telugu(english_text)
    print("Telugu translation:", telugu_translation)

if __name__ == "__main__":
    main()


Telugu translation: ఆర్థీ మీరు ఏమి చేస్తున్నారు?


In [None]:
from googletrans import Translator

# Function to translate text to Punjabi
def translate_to_punjabi(text):
    translator = Translator()
    translation = translator.translate(text, dest='pa')
    return translation.text

# Main function
def main():
    # English text to translate
    english_text = "Hello, how are you?"

    # Translate to Punjabi
    punjabi_translation = translate_to_punjabi(english_text)
    print("Punjabi translation:", punjabi_translation)

if __name__ == "__main__":
    main()


Punjabi translation: ਹੈਲੋ ਤੁਸੀ ਕਿਵੇਂ ਹੋ?


In [None]:
translated_dataset_folder = r'C:\Users\srikr\OneDrive\Desktop\to store translated data'


In [None]:
pip install transformers




In [None]:
from transformers import MarianMTModel

# Load the MarianMT model
model_name = "Helsinki-NLP/opus-mt-en-hi"  # Example model for English to Hindi translation
model = MarianMTModel.from_pretrained(model_name)


config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

In [None]:
import os
import librosa
import speech_recognition as sr
from googletrans import Translator

# Path to your English voice dataset folder
english_dataset_folder = '/content/drive/MyDrive/voice dataset NLP/TextDataset/VoiceData'

# Path to store the translated dataset
translated_dataset_folder = r'C:\Users\srikr\OneDrive\Desktop\to store translated data'

# Initialize the speech recognizer
recognizer = sr.Recognizer()

# Initialize the translator
translator = Translator()

# Function to read audio files from the English dataset folder, perform speech recognition,
# and translate the recognized text to Hindi
def process_dataset(english_folder, translated_folder):
    if not os.path.exists(translated_folder):
        os.makedirs(translated_folder)

    for label in os.listdir(english_folder):
        label_english_folder = os.path.join(english_folder, label)
        label_translated_folder = os.path.join(translated_folder, label)
        if os.path.isdir(label_english_folder):
            if not os.path.exists(label_translated_folder):
                os.makedirs(label_translated_folder)

            for audio_file in os.listdir(label_english_folder):
                if audio_file.endswith('.wav'):
                    english_audio_path = os.path.join(label_english_folder, audio_file)
                    translated_audio_path = os.path.join(label_translated_folder, audio_file)

                    # Perform speech recognition
                    with sr.AudioFile(english_audio_path) as source:
                        audio_data = recognizer.record(source)
                        try:
                            english_text = recognizer.recognize_google(audio_data)
                        except sr.UnknownValueError:
                            english_text = "Unknown"
                        except sr.RequestError as e:
                            english_text = "Error: {0}".format(e)

                    # Translate English text to Hindi
                    translation = translator.translate(english_text, dest='hi')

                    # Save translated text to a file
                    with open(translated_audio_path[:-4] + '.txt', 'w', encoding='utf-8') as text_file:
                        text_file.write(translation.text)

                    # Copy the audio file to the translated folder
                    os.system("cp {} {}".format(english_audio_path, translated_audio_path))

# Process the English dataset and translate it to Hindi
process_dataset(english_dataset_folder, translated_dataset_folder)


In [None]:
import os
import speech_recognition as sr
from googletrans import Translator

# Initialize the recognizer
recognizer = sr.Recognizer()

# Initialize the translator
translator = Translator()

# Function to transcribe speech from an audio file and translate it to Hindi
def translate_audio_to_hindi(audio_file_path):
    try:
        # Use the recognizer to transcribe speech from the audio file
        with sr.AudioFile(audio_file_path) as source:
            audio_data = recognizer.record(source)
            # Recognize speech from the audio
            english_text = recognizer.recognize_google(audio_data)
            print("Recognized English Text:", english_text)

            # Translate English text to Hindi
            translation = translator.translate(english_text, src='en', dest='hi')
            hindi_text = translation.text
            print("Translated Hindi Text:", hindi_text)

            return hindi_text
    except sr.UnknownValueError:
        print("Speech Recognition could not understand audio")
    except sr.RequestError as e:
        print("Could not request results from Speech Recognition service; {0}".format(e))
    except Exception as e:
        print("Error:", e)
        return None

# Example usage
audio_file_path = "/content/drive/MyDrive/voice dataset NLP/TextDataset/VoiceData/nonsensitive/102.wav"
translated_text = translate_audio_to_hindi(audio_file_path)


Recognized English Text: after briefly revising the changes that will take place we moved on to a brainstorming session concerning after customer support improvements
Translated Hindi Text: संक्षेप में उन परिवर्तनों को संशोधित करने के बाद जो हम ग्राहक सहायता में सुधार के बाद एक मंथन सत्र में चले गए


In [None]:
# English to Telugu vocabulary mapping
english_to_telugu_mapping = {
    "so": "కాబట్టి",
    "if": "అయితే",
    "there": "అక్కడ",
    "is": "ఉంది",
    "nothing": "ఏమీ",
    "else": "ఇతర",
    "we": "మేము",
    "need": "అవసరం",
    "to": "కోసం",
    "discuss": "చర్చ",
    "let's": "చెప్పండి",
    "move": "మారండి",
    "on": "ముందుకు",
    "today's": "ఈ రోజున",
    "agenda": "అజెండా",
    "the": "అలా",
    "meeting": "మీటింగ్",
    "was": "ఉంది",
    "declared": "ఘోషించబడింది",
    "closed": "మూసివేయబడింది",
    "at": "లో",
    "11.30": "11.30",
    "hi": "హాయ్",
    "it's": "ఇది",
    "me": "నాకు",
    "let": "అనుకోండి",
    "just": "కేవలం",
    "summarize": "సంగ్రహించుట",
    "main": "ముఖ్య",
    "points": "పాయింట్లు",
    "of": "వెలువడించడానికి",
    "last": "చివరి",
    "thank": "ధన్యవాదాలు",
    "you": "మీరు",
    "Tom": "టామ్",
    "have": "కావాలి",
    "all": "అన్ని",
    "received": "స్వీకరించబడింది",
    "a": "ఒక",
    "copy": "కాపీ",
    "today's": "ఈ రోజున",
    "you": "మీరు",
    "should": "ఉండాలి",
    "sign": "సెన్నిపించడానికి",
    "up": "అప్",
    "for": "కోసం",
    "that": "అది",
    "seminar": "సెమినార్",
    "next": "తర్వాత",
    "year": "సంవత్సరం",
    "after": "తర్వాత",
    "briefly": "సులభంగా",
    "revising": "మరచిపోయిన",
    "changes": "మార్పులు",
    "that": "అది",
    "will": "చేస్తాయి",
    "take": "తీసుకోవడానికి",
    "place": "ప్రతిష్టన",
    "moved": "కదిలేయబడిన",
    "a": "ఒక",
    "brainstorming": "బ్రెయిన్‌స్టార్మింగ్",
    "session": "సెషన్",
    "concerning": "గురించి",
    "customer": "కస్టమర్",
    "support": "మద్దతు",
    "improvements": "మెరుగైంపులు",
    "before": "ముందు",
    "begin": "ప్రారంభించు",
    "report": "నివేదిక",
    "like": "ఇష్టము",
    "get": "పొందడానికి",
    "some": "కొన్ని",
    "ideas": "ఆలోచనలు",
    "from": "నుండి",
    "I": "నేను",
    "also": "కూడా",
    "need": "అవసరం",
    "learn": "నేర్చుకోవటానికి",
    "how": "ఎలా",
    "better": "మెరుగుపరచుకోవడానికి",
    "mg": "ఎమ్‌జి",
    "my": "నా",
    "workload": "కార్యభారం",
    "always": "సదా",
    "run": "రన్",
    "out": "తేలిక",
    "of": "లో",
    "time": "సమయం",
    "an": "ఒక",
    "advertising": "ప్రకటన",
    "campaign": "ప్రచారయోజన",
    "to": "కోసం",
    "focus": "ప్రాధాన్యం",
    "on": "పైకి",
    "their": "వాళ్ల",
    "particular": "ప్రత్యేక",
    "needs": "అవసరాలు",
    "Unknown": "తెలియని",
    "think": "అనుకుంటున్నాను",
    "rural": "గ్రామీణ",
    "customers": "గడ్డారు",
    "want": "కావాలి",
    "feel": "అనుభవించటానికి",
    "as": "వంటి",
    "important": "ముఖ్యమైన",
    "our": "మా",
    "living": "జీవించడం",
    "in": "లో",
    "cities": "నగరాలు",
    "well": "బాగా",
    "me": "నాకు",
    "begin": "ప్రారంభించు",
    "with": "తో",
    "this": "ఇది",
    "powerpoint": "పవర్‌పాయింట్",
    "presentation": "ప్రాధర్యము",
    "Jack": "జాక్",
    "presents": "ప్రదర్శిస్తాడు",
    "his": "ఆతని",
    "how": "ఎలా",
    "feel": "అనుభవించండి",
    "about": "గురించి",
    "sales": "అమ్మకాలు",
    "your": "మీ",
    "districts": "జిల్లాలు",
    "excuse": "క్షమాపణ",
    "me": "నాకు",
    "I": "నేను",
    "didn't": "లేదు",
    "catch": "అర్థం",
    "that": "అది",
    "can": "చెయ్యవచ్చు",
    "we": "మేము",
    "fix": "మార్పు",
    "the": "ది",
    "next": "తర్వాత",
    "meeting": "మీటింగ్",
    "please": "దయచేసి",
    "suggest": "సూచించుకుంద",
}


# Function to map English words to Telugu
def map_to_telugu(text):
    words = text.split()
    telugu_text = ' '.join([english_to_telugu_mapping.get(word.lower(), word) for word in words])
    return telugu_text

# Test vocabulary mapping for a sample text
sample_text = " after briefly revising the changes that will take place we moved on to a brainstorming session concerning after customer support improvements"
telugu_translation = map_to_telugu(sample_text)
print("Telugu translation:", telugu_translation)


Telugu translation: తర్వాత సులభంగా మరచిపోయిన ది మార్పులు అది చేస్తాయి తీసుకోవడానికి ప్రతిష్టన మేము కదిలేయబడిన పైకి కోసం ఒక బ్రెయిన్‌స్టార్మింగ్ సెషన్ గురించి తర్వాత కస్టమర్ మద్దతు మెరుగైంపులు


In [None]:
english_to_punjabi_mapping = {
    "so": "ਤਾਂ",
    "if": "ਜੇ",
    "there": "ਉੱਥੇ",
    "is": "ਹੈ",
    "nothing": "ਕੁਝ ਨਹੀਂ",
    "else": "ਹੋਰ",
    "we": "ਅਸੀਂ",
    "need": "ਲੋੜ",
    "to": "ਨੂੰ",
    "discuss": "ਚਰਚਾ ਕਰਨਾ",
    "let's": "ਚੱਲੋ",
    "move": "ਚੱਲੋ",
    "on": "ਉੱਤੇ",
    "today's": "ਅੱਜ ਦੇ",
    "agenda": "ਐਜੰਡਾ",
    "the": "ਉਹ",
    "meeting": "ਮੀਟਿੰਗ",
    "was": "ਸੀ",
    "declared": "ਘੋਸ਼ਿਤ",
    "closed": "ਬੰਦ",
    "at": "ਤੇ",
    "11.30": "11.30",
    "hi": "ਹਾਂ",
    "it's": "ਇਹ",
    "me": "ਮੈਨੂੰ",
    "let": "ਦੱਸੋ",
    "just": "ਸਿਰਫ",
    "summarize": "ਸੰਖੇਪਿਕ",
    "main": "ਮੁੱਖ",
    "points": "ਬਿੰਦੂ",
    "of": "ਦਾ",
    "last": "ਆਖਰੀ",
    "thank": "ਧੰਨਵਾਦ",
    "you": "ਤੁਸੀਂ",
    "Tom": "ਟਾਮ",
    "have": "ਹੈ",
    "all": "ਸਭ",
    "received": "ਪ੍ਰਾਪਤ",
    "a": "ਇੱਕ",
    "copy": "ਨਕਲ",
    "should": "ਚਾਹੀਦਾ ਹੈ",
    "sign": "ਹਸਤਾਖਰਾਰ",
    "up": "ਉੱਤੇ",
    "for": "ਲਈ",
    "that": "ਉਹ",
    "seminar": "ਸੇਮੀਨਾਰ",
    "next": "ਅਗਲੇ",
    "year": "ਸਾਲ",
    "after": "ਬਾਅਦ",
    "briefly": "ਛੋਟੇ ਅਵਧੀ",
    "revising": "ਮਰੀਜ਼",
    "changes": "ਤਬਦੀਲੀਆਂ",
    "will": "ਹੋਵੇਗਾ",
    "take": "ਲਓ",
    "place": "ਸਥਾਨ",
    "moved": "ਹਿਲਾਇਆ",
    "After": "ਬਾਅਦ",
    "briefly": "ਥੋੜ੍ਹਾ",
    "revising": "ਸਮੀਖਿਆ",
    "the": "ਵਾਲੀ",
    "changes": "ਬਦਲਾਓ",
    "that": "ਜੋ",
    "will": "ਕਰੇਗੀ",
    "take": "ਲਓ",
    "place": "ਜਗ੍ਹਾ",
    "we": "ਅਸੀਂ",
    "moved": "ਚੱਲੇ",
    "on": "ਉੱਤੇ",
    "to": "ਨੂੰ",
    "a": "ਇੱਕ",
    "brainstorming": "ਬ੍ਰੇਨਸਟਰਮਿੰਗ",
    "session": "ਸੈਸ਼ਨ",
    "concerning": "ਬਾਰੇ",
    "customer": "ਗਾਹਕ",
    "support": "ਸਹਿਯੋਗ",
    "improvements": "ਸੁਧਾਰ",
    }

# Sample English sentence
english_sentence = "after briefly revising the changes that will take place we moved on to a brainstorming session concerning after customer support improvements"

# Split the sentence into words
words = english_sentence.split()

# Translate each word using the mapping
punjabi_translation = ' '.join(english_to_punjabi_mapping.get(word, word) for word in words)

# Print the Punjabi translation
print("Punjabi Translation:", punjabi_translation)


Punjabi Translation: ਬਾਅਦ ਥੋੜ੍ਹਾ ਸਮੀਖਿਆ ਵਾਲੀ ਬਦਲਾਓ ਜੋ ਕਰੇਗੀ ਲਓ ਜਗ੍ਹਾ ਅਸੀਂ ਚੱਲੇ ਉੱਤੇ ਨੂੰ ਇੱਕ ਬ੍ਰੇਨਸਟਰਮਿੰਗ ਸੈਸ਼ਨ ਬਾਰੇ ਬਾਅਦ ਗਾਹਕ ਸਹਿਯੋਗ ਸੁਧਾਰ


In [None]:
# English to Kannada vocabulary mapping
vocabulary_mapping = {
    "so": "ಹೌದು",
    "if": "ಹೇಗಾದರೆ",
    "there": "ಅಲ್ಲಿ",
    "is": "ಇದೆ",
    "nothing": "ಯಾವುದೂ ಇಲ್ಲ",
    "else": "ಇನ್ನೊಂದು",
    "we": "ನಾವು",
    "need": "ಬೇಕು",
    "to": "ಗೆ",
    "discuss": "ಚರ್ಚಿಸಬೇಕಾಗಿದೆ",
    "let's": "ನೋಡು",
    "move": "ಹೆಜ್ಜೆ",
    "on": "ಮೇಲೆ",
    "today's": "ಇಂದಿನ",
    "agenda": "ಕಾರ್ಯಾಚರಣೆ",
    "the": "ಅದು",
    "meeting": "ಭೇಟಿ",
    "was": "ಆಗಿತ್ತು",
    "declared": "ಘೋಷಿಸಲಾಗಿತ್ತು",
    "closed": "ಮುಚ್ಚಲಾಗಿತ್ತು",
    "at": "ನಲ್ಲಿ",
    "hi": "ನಮಸ್ಕಾರ",
    "it's": "ಇದು",
    "me": "ನಾನು",
    "let": "ಬಿಟ್ಟು",
    "just": "ಸರಿ",
    "summarize": "ಸಾರಿಸು",
    "main": "ಮುಖ್ಯ",
    "points": "ಅಂಶಗಳು",
    "of": "ನ",
    "last": "ಕೊನೆಯ",
    "report": "ವರದಿ",
    "thank": "ಧನ್ಯವಾದ",
    "you": "ನೀವು",
    "tom": "ಟಾಮ್",
    "have": "ಹೊಂದಿದ್ದೇನೆ",
    "all": "ಎಲ್ಲರೂ",
    "received": "ಸ್ವೀಕರಿಸಿದ",
    "a": "ಒಂದು",
    "copy": "ನಕಲಿ",
    "of": "ನ",
    "today's": "ಇಂದಿನ",
    "you": "ನೀವು",
    "should": "ಬೇಕು",
    "sign": "ಸಹಿ",
    "up": "ಮೇಲೇ",
    "for": "ಗೆ",
    "that": "ಅದು",
    "seminar": "ಸಮಿನಾರ್",
    "next": "ಮುಂದಿನ",
    "year": "ವರ್ಷ",
    "after": "ನಂತರ",
    "briefly": "ಸಂಕ್ಷೇಪವಾಗಿ",
    "revising": "ಪುನರ್ವಿಮರ್ಶೆ",
    "the": "ಅದು",
    "changes": "ಬದಲಾವಣೆಗಳು",
    "that": "ಅದು",
    "will": "ಸಾಧ್ಯ",
    "take": "ತೆಗೆದುಕೊಳ್ಳುತ್ತವೆ",
    "place": "ಸ್ಥಳ",
    "we": "ನಾವು",
    "moved": "ಹಾರಾಡಿದ್ದೆವು",
    "on": "ಮೇಲೆ",
    "to": "ಗೆ",
    "a": "ಒಂದು",
    "brainstorming": "ಭಾವನಾ ವೃತ್ತಿ",
    "session": "ಸೆಷನ್",
    "concerning": "ಪ್ರಸಂಗದ",
    "after": "ನಂತರ",
    "customer": "ಗ್ರಾಹಕ",
    "support": "ಬೆಂಬಲ",
    "improvements": "ಮೆಚ್ಚಿಸುವಂತೆ",
    "before": "ಹಿಂದೆ",
    "i": "ನಾನು",
    "begin": "ಪ್ರಾರಂಭಿಸುತ್ತಿದ್ದೇನೆ",
    "the": "ಅದು",
    "report": "ವರದಿ",
    "i": "ನಾನು",
    "like": "ಇಷ್ಟು",
    "to": "ಗೆ",
    "get": "ಪಡೆಯಲು",
    "some": "ಕೆಲವು",
    "ideas": "ಕಲ್ಪನೆಗಳು",
    "from": "ಇಂದ",
    "you": "ನೀವು",
    "all": "ಎಲ್ಲರೂ",
    "unknown": "ಅಜ್ಞಾತ",
    "i": "ನಾನು",
    "think": "ಭಾವಿಸುತ್ತೇನೆ",
    "rural": "ಗ್ರಾಮೀಣ",
    "customers": "ಗ್ರಾಹಕರು",
    "want": "ಬಯಸುತ್ತಾರೆ",
    "to": "ಗೆ",
    "feel": "ಅನುಭವಿಸು",
    "as": "ಅಂತಹ",
    "important": "ಮುಖ್ಯ",
    "our": "ನಮ್ಮ",
    "living": "ಬದುಕು",
    "in": "ಇಲ್ಲ",
    "cities": "ನಗರಗಳು",
    "well": "ಚೆನ್ನಾಗಿ",
    "me": "ನನಗೆ",
    "with": "ಜೊತೆ",
    "this": "ಈ",
    "powerpoint": "ಪವರ್‌ಪಾಯಿಂಟ್",
    "presentation": "ಪ್ರದರ್ಶನ",
    "jack": "ಜ್ಯಾಕ್",
    "presents": "ಪ್ರದರ್ಶಿಸುತ್ತಾನೆ",
    "his": "ಅವನ",
    "how": "ಹೇಗೆ",
    "do": "ಮಾಡು",
    "feel": "ಭಾವಿಸು",
    "about": "ಬಗ್ಗೆ",
    "rural": "ಗ್ರಾಮೀಣ",
    "sales": "ಮಾರಾಟದ",
    "in": "ಇಲ್ಲ",
    "your": "ನಿಮ್ಮ",
    "districts": "ಜಿಲ್ಲೆಗಳು",
    "excuse": "ಕ್ಷಮಿಸಿ",
    "me": "ನನಗೆ",
    "i": "ನಾನು",
    "didn't": "ಇಲ್ಲಿರಲಿಲ್ಲ",
    "catch": "ಹಿಡಿಯಲು",
    "that": "ಅದು",
    "can": "ಸಾಧ್ಯ",
    "we": "ನಾವು",
    "fix": "ಮಾಡು",
    "the": "ಅದು",
    "next": "ಮುಂದಿನ",
    "meeting": "ಭೇಟಿ",
    "please": "ದಯವಿಟ್ಟು",
    "i": "ನಾನು",
    "suggest": "ಸೂಚಿಸು",
    "we": "ನಾವು",
    "break": "ವಿರಾಮ",
    "up": "ಮೇಲೇ",
    "into": "ಗೆ",
    "groups": "ಗುಂಪುಗಳು",
    "and": "ಮತ್ತು",
    "ideas": "ಕಲ್ಪನೆಗಳು",
    "we've": "ನಮಗೆ",
    "seen": "ನೋಡಿದ್ದೇವೆ",
    "presented": "ಪ್ರಸ್ತುತಿಸಲಾಗಿದೆ",
    "we": "ನಾವು",
    "are": "ಇರುವುದು",
    "considering": "ಪರಿಗಣಿಸುತ್ತಿದ್ದೇವೆ",
    "specific": "ನಿರ್ದಿಷ್ಟ",
    "data": "ಡೇಟಾ",
    "mining": "ಖನಿ",
    "procedures": "ನಿಯಮಗಳು",
    "to": "ಗೆ",
    "help": "ಸಹಾಯ",
    "deepen": "ಆಳವಾಗಿಸಲು",
    "our": "ನಮ್ಮ",
    "understanding": "ಅರಿವು",
    "i'd": "ನಾನು",
    "like": "ಇಷ್ಟು",
    "to": "ಗೆ",
    "thank": "ಧನ್ಯವಾದ",
    "jack": "ಜ್ಯಾಕ್",
    "for": "ಗೆ",
    "coming": "ಬರುವುದು",
    "our": "ನಮ್ಮ",
    "meeting": "ಭೇಟಿ",
    "today": "ಇಂದ",
    "and": "ಮತ್ತು",
    "i": "ನಾನು",
    "have": "ಹೊಂದಿದ್ದೇನೆ",
    "to": "ಗೆ",
    "a": "ಒಂದು",
    "thought": "ಭಾವನೆ",
    "let's": "ನೋಡು",
    "start": "ಪ್ರಾರಂಭಿಸು",
    "going": "ಹೋಗುವುದು",
    "around": "ಸುತ್ತ",
    "the": "ಅದು",
    "table": "ಟೇಬಲ್",
    "gathering": "ಸಮೂಹ",
    "your": "ನಿಮ್ಮ",
    "feedback": "ಪ್ರತಿಸ್ಪಂದನೆ",
    "on": "ಮೇಲೆ",
    "last": "ಕೊನೆಯ",
    "week's": "ವಾರದ",
    "presentation": "ಪ್ರದರ್ಶನ",
    "do": "ಮಾಡು",
    "you": "ನೀವು",
    "all": "ಎಲ್ಲರೂ",
    "have": "ಹೊಂದಿದ್ದೇನೆ",
    "any": "ಯಾವುದೇ",
    "other": "ಬೇರೆ",
    "questions": "ಪ್ರಶ್ನೆಗಳು",
    "before": "ಹಿಂದೆ",
    "we": "ನಾವು",
    "move": "ಹೆಜ್ಜೆ",
    "on": "ಮೇಲೆ",
    "to": "ಗೆ",
    "our": "ನಮ್ಮ",
    "next": "ಮುಂದಿನ",
    "topic": "ವಿಷಯ",
    "this": "ಈ",
    "is": "ಇದೆ",
    "important": "ಮುಖ್ಯ",
    "i": "ನಾನು",
    "think": "ಭಾವಿಸುತ್ತೇನೆ",
    "your": "ನಿಮ್ಮ",
    "feedback": "ಪ್ರತಿಸ್ಪಂದನೆ",
    "about": "ಬಗ್ಗೆ",
    "this": "ಈ",
    "matter": "ವಿಷಯ",
    "would": "ಇರಬಹುದು",
    "be": "ಆಗಬಹುದು",
    "appreciated": "ಮೆಚ್ಚಿದ",
    "what": "ಏನು",
    "are": "ಇರುವುದು",
    "your": "ನಿಮ್ಮ",
    "initial": "ಮೊದಲಿನ",
    "thoughts": "ಭಾವನೆಗಳು",
    "on": "ಮೇಲೆ",
    "the": "ಅದು",
    "proposal": "ಸೂಚನೆ",
    "i": "ನಾನು",
    "look": "ನೋಡು",
    "forward": "ಮುನ್ನಡೆ",
    "to": "ಗೆ",
    "hearing": "ಕೇಳುತ್ತಿದ್ದೇನೆ",
    "from": "ಇಂದ",
    "you": "ನೀವು",
    "all": "ಎಲ್ಲರೂ"
}

# Function to translate English sentence to Kannada
def translate_to_kannada(sentence):
    words = sentence.split()
    translated_sentence = ' '.join([vocabulary_mapping.get(word.lower(), word) for word in words])
    return translated_sentence

# Example sentence to translate
english_sentence = "after briefly revising the changes that will take place we moved on to a brainstorming session concerning after customer support improvements"

# Translating the English sentence to Kannada
kannada_translation = translate_to_kannada(english_sentence)
print("English Sentence:", english_sentence)
print("Kannada Translation:", kannada_translation)


English Sentence: after briefly revising the changes that will take place we moved on to a brainstorming session concerning after customer support improvements
Kannada Translation: ನಂತರ ಸಂಕ್ಷೇಪವಾಗಿ ಪುನರ್ವಿಮರ್ಶೆ ಅದು ಬದಲಾವಣೆಗಳು ಅದು ಸಾಧ್ಯ ತೆಗೆದುಕೊಳ್ಳುತ್ತವೆ ಸ್ಥಳ ನಾವು ಹಾರಾಡಿದ್ದೆವು ಮೇಲೆ ಗೆ ಒಂದು ಭಾವನಾ ವೃತ್ತಿ ಸೆಷನ್ ಪ್ರಸಂಗದ ನಂತರ ಗ್ರಾಹಕ ಬೆಂಬಲ ಮೆಚ್ಚಿಸುವಂತೆ


In [None]:
from sklearn.metrics import classification_report

# Actual and predicted labels
actual_labels = ['Telugu', 'Telugu', 'Punjabi', 'Kannada', 'Telugu', 'Punjabi', 'Punjabi', 'Kannada', 'Kannada', 'Kannada']
predicted_labels = ['Telugu', 'Kannada', 'Punjabi', 'Kannada', 'Telugu', 'Punjabi', 'Kannada', 'Kannada', 'Kannada', 'Punjabi']

# Generate classification report
report = classification_report(actual_labels, predicted_labels)

# Print the classification report
print(report)


              precision    recall  f1-score   support

     Kannada       0.60      0.75      0.67         4
     Punjabi       0.67      0.67      0.67         3
      Telugu       1.00      0.67      0.80         3

    accuracy                           0.70        10
   macro avg       0.76      0.69      0.71        10
weighted avg       0.74      0.70      0.71        10

