# üáÆüá≥üåç Cultural-Aware AI Language Tutor
Author: [Rhythm Bhatia](https://rhythmbhatia.com)

In [1]:
# Install dependencies
!pip install -q transformers sentencepiece torch googletrans==4.0.0-rc1 gtts
!apt-get install -y ffmpeg

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 43 not upgraded.


In [2]:
!pip install googletrans==4.0.0-rc1 --quiet


In [3]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from googletrans import Translator
from gtts import gTTS
from IPython.display import Audio, display
import random


In [4]:
# Load translation model (Flan-T5 base)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

translator = Translator()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [5]:
# --- USER CHOICE ---
user_native_lang = "hi"  # Hindi (code), could be "ar" for Arabic, etc.
target_lang = "de"       # Target language: German

# --- PHRASES ---
basic_phrases = [
    "‡§®‡§Æ‡§∏‡•ç‡§§‡•á",         # Namaste - Hello
    "‡§Ü‡§™ ‡§ï‡•à‡§∏‡•á ‡§π‡•à‡§Ç?",   # How are you?
    "‡§Æ‡•á‡§∞‡§æ ‡§®‡§æ‡§Æ ... ‡§π‡•à‡•§",# My name is...
    "‡§∂‡•Å‡§ï‡•ç‡§∞‡§ø‡§Ø‡§æ",       # Thank you
    "‡§Æ‡•Å‡§ù‡•á ‡§Æ‡§æ‡§´‡§º ‡§ï‡§∞‡•á‡§Ç"  # Excuse me / Sorry
]


In [9]:
from gtts import gTTS
from IPython.display import Audio, display
import random

# For translation: using googletrans
from googletrans import Translator
translator = Translator()

def tts_play(text, lang_code):
    """
    Generate speech audio from text and play inline in Colab.
    lang_code: language code like 'hi' for Hindi, 'de' for German, etc.
    """
    tts = gTTS(text=text, lang=lang_code)
    filename = f"tts_{random.randint(1000,9999)}.mp3"
    tts.save(filename)
    display(Audio(filename, autoplay=True))

def translate_text(text, src_lang, tgt_lang):
    """
    Translate `text` from src_lang to tgt_lang using googletrans.
    src_lang, tgt_lang: ISO language codes, e.g., 'hi', 'de'
    """
    translation = translator.translate(text, src=src_lang, dest=tgt_lang)
    return translation.text


In [11]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Load Flan-T5 base model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

def generate_german_explanation(german_phrase):
    """
    Given a German phrase, generate a simple English explanation using Flan-T5.
    """
    prompt = f"Explain this German phrase in simple English:\n{german_phrase}"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=100)
    explanation = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return explanation


In [14]:
for phrase in basic_phrases:
    print(f"\nNative phrase ({user_native_lang}): {phrase}")
    tts_play(phrase, user_native_lang)  # Speak phrase in native language

    german_phrase = translate_text(phrase, src_lang=user_native_lang, tgt_lang=target_lang)
    print(f"German translation: {german_phrase}")
    tts_play(german_phrase, target_lang) # Speak phrase in German


Native phrase (hi): ‡§®‡§Æ‡§∏‡•ç‡§§‡•á


German translation: Hallo



Native phrase (hi): ‡§Ü‡§™ ‡§ï‡•à‡§∏‡•á ‡§π‡•à‡§Ç?


German translation: Wie geht es dir?



Native phrase (hi): ‡§Æ‡•á‡§∞‡§æ ‡§®‡§æ‡§Æ ... ‡§π‡•à‡•§


German translation: ich hei√üe.



Native phrase (hi): ‡§∂‡•Å‡§ï‡•ç‡§∞‡§ø‡§Ø‡§æ


German translation: Danke



Native phrase (hi): ‡§Æ‡•Å‡§ù‡•á ‡§Æ‡§æ‡§´‡§º ‡§ï‡§∞‡•á‡§Ç


German translation: Es tut mir Leid


# Generate Culturally Sensitive Quiz

In [56]:
import random

# Hindi-German dialogue pairs
dialogues = {
    "‡§Æ‡•Å‡§ù‡•á ‡§è‡§ï ‡§Æ‡•å‡§ï‡§æ ‡§¶‡•ã‡•§": "Gib mir eine Chance.",
    "‡§¨‡§æ‡§¨‡§æ ‡§∏‡§æ‡§π‡•á‡§¨, ‡§Æ‡•à‡§Ç ‡§§‡•Å‡§Æ‡•ç‡§π‡§æ‡§∞‡•á ‡§≤‡§ø‡§è ‡§≤‡§°‡§º‡•Ç‡§Ç‡§ó‡§æ‡•§": "Baba Saheb, ich werde f√ºr dich k√§mpfen.",
    "‡§ú‡§º‡§ø‡§Ç‡§¶‡§ó‡•Ä ‡§Æ‡•á‡§Ç ‡§¶‡•ã ‡§¨‡§æ‡§∞ ‡§Æ‡•å‡§ï‡§æ ‡§Ü‡§§‡§æ ‡§π‡•à‡•§": "Im Leben kommt die Gelegenheit zweimal."
}

# Function to generate distractors (basic version for now)
def generate_distractors(correct_answer):
    words = correct_answer.split()
    distractors = set()
    while len(distractors) < 3:
        shuffled = random.sample(words, len(words))
        fake = ' '.join(shuffled)
        if fake != correct_answer:
            distractors.add(fake)
    return list(distractors)

# Generate the cultural quiz
print("\n--- Generating Cultural Quiz ---\n")
for idx, (hi, de) in enumerate(dialogues.items(), 1):
    correct = de
    distractors = generate_distractors(correct)
    options = distractors + [correct]
    random.shuffle(options)

    # Print question
    print(f"Question {idx} for: \"{hi}\" ‚Üí \"{de}\"\n")
    print(f"{idx}. What does '{hi}' mean in German?\n")
    for opt_idx, opt in zip("ABCD", options):
        print(f"{opt_idx}) {opt}")
    correct_letter = "ABCD"[options.index(correct)]
    print(f"\nCorrect answer: {correct_letter}\n")
    print("-" * 60)



--- Generating Cultural Quiz ---

Question 1 for: "‡§Æ‡•Å‡§ù‡•á ‡§è‡§ï ‡§Æ‡•å‡§ï‡§æ ‡§¶‡•ã‡•§" ‚Üí "Gib mir eine Chance."

1. What does '‡§Æ‡•Å‡§ù‡•á ‡§è‡§ï ‡§Æ‡•å‡§ï‡§æ ‡§¶‡•ã‡•§' mean in German?

A) Gib Chance. mir eine
B) Gib mir eine Chance.
C) eine Gib Chance. mir
D) Chance. eine mir Gib

Correct answer: B

------------------------------------------------------------
Question 2 for: "‡§¨‡§æ‡§¨‡§æ ‡§∏‡§æ‡§π‡•á‡§¨, ‡§Æ‡•à‡§Ç ‡§§‡•Å‡§Æ‡•ç‡§π‡§æ‡§∞‡•á ‡§≤‡§ø‡§è ‡§≤‡§°‡§º‡•Ç‡§Ç‡§ó‡§æ‡•§" ‚Üí "Baba Saheb, ich werde f√ºr dich k√§mpfen."

2. What does '‡§¨‡§æ‡§¨‡§æ ‡§∏‡§æ‡§π‡•á‡§¨, ‡§Æ‡•à‡§Ç ‡§§‡•Å‡§Æ‡•ç‡§π‡§æ‡§∞‡•á ‡§≤‡§ø‡§è ‡§≤‡§°‡§º‡•Ç‡§Ç‡§ó‡§æ‡•§' mean in German?

A) ich Baba dich f√ºr Saheb, werde k√§mpfen.
B) Baba dich ich f√ºr k√§mpfen. Saheb, werde
C) Baba Saheb, ich werde f√ºr dich k√§mpfen.
D) f√ºr ich Saheb, dich k√§mpfen. Baba werde

Correct answer: C

------------------------------------------------------------
Question 3 for: "‡§ú‡§º‡§ø‡§Ç‡§¶‡§ó‡•Ä ‡§Æ‡•á‡§Ç ‡§¶‡•ã ‡§¨‡§æ‡§