In [None]:
import pandas as pd
import random

# --- Load source items ---
file_path = "/mnt/data/Filtered_Unique_Menu_Items (1).csv"
df = pd.read_csv(file_path)

# Use the correct column for item names
ITEM_COL = "Item_Name"
items = df[ITEM_COL].dropna().astype(str).tolist()

# --- Quantity word maps (1..10) ---
tamil_qty_map = {
    1: "oru", 2: "rendu", 3: "moonu", 4: "naalu", 5: "anju",
    6: "aaru", 7: "ezhu", 8: "ettu", 9: "onpathu", 10: "pathu"
}
english_qty_map = {
    1: "one", 2: "two", 3: "three", 4: "four", 5: "five",
    6: "six", 7: "seven", 8: "eight", 9: "nine", 10: "ten"
}
hindi_qty_map = {
    1: "ek", 2: "do", 3: "teen", 4: "char", 5: "paanch",
    6: "chhe", 7: "saat", 8: "aath", 9: "nau", 10: "das"
}

def sentence_with_random_quantities(items_pool, qty_map, k=None):
    """
    Create a sentence of k items (6-10 default) with randomized order of items
    and quantities independently sampled 1..10 *with replacement* (repeats allowed).
    """
    if k is None:
        k = random.randint(6, 10)
    # random unique items, randomized order
    chosen_items = random.sample(items_pool, min(k, len(items_pool)))
    # random quantities with replacement
    quantities = [random.randint(1, 10) for _ in range(len(chosen_items))]
    parts = [f"{qty_map[q]} {chosen_items[i]}" for i, q in enumerate(quantities)]
    return " , ".join(parts)

def make_lang_df(lang_prefix, lang_name, qty_map, n_sentences=20):
    rows = []
    for i in range(1, n_sentences + 1):
        sent = sentence_with_random_quantities(items, qty_map, k=None)
        rows.append((f"{lang_prefix}_{i:02d}", sent, lang_name))
    return pd.DataFrame(rows, columns=["sentence_id", "sentence", "language"])

# Build dataframes
tamil_df = make_lang_df("tam", "tamil", tamil_qty_map, 20)
english_df = make_lang_df("eng", "english", english_qty_map, 20)
hindi_df = make_lang_df("hin", "hindi", hindi_qty_map, 20)

# Save to Excel with three sheets
output_excel_path = "/mnt/data/multilingual_sentences_random_qty.xlsx"
with pd.ExcelWriter(output_excel_path, engine="xlsxwriter") as writer:
    tamil_df.to_excel(writer, sheet_name="Tamil", index=False)
    english_df.to_excel(writer, sheet_name="English", index=False)
    hindi_df.to_excel(writer, sheet_name="Hindi", index=False)

# Show previews to user
import ace_tools as tools
tools.display_dataframe_to_user("Tamil (random quantities) - preview", tamil_df.head(15))
tools.display_dataframe_to_user("English (random quantities) - preview", english_df.head(15))
tools.display_dataframe_to_user("Hindi (random quantities) - preview", hindi_df.head(15))

output_excel_path


In [None]:
import pandas as pd
import random

# --- Config ---
INPUT_CSV = "/mnt/data/Filtered_Unique_Menu_Items (1).csv"
OUTPUT_XLSX = "/mnt/data/multilingual_sentences_random_qty.xlsx"
NUM_SENTENCES = 20
MIN_ITEMS_PER_SENT = 6
MAX_ITEMS_PER_SENT = 10
ITEM_NAME_COL = "Item_Name"  # confirmed earlier

# --- Load items ---
df_src = pd.read_csv(INPUT_CSV)
items = df_src[ITEM_NAME_COL].dropna().astype(str).tolist()

# --- Quantity word maps (1..10) ---
tamil_qty = {1:"oru", 2:"rendu", 3:"moonu", 4:"naalu", 5:"anju", 6:"aaru", 7:"ezhu", 8:"ettu", 9:"onpathu", 10:"pathu"}
english_qty = {1:"one", 2:"two", 3:"three", 4:"four", 5:"five", 6:"six", 7:"seven", 8:"eight", 9:"nine", 10:"ten"}
hindi_qty = {1:"ek", 2:"do", 3:"teen", 4:"char", 5:"paanch", 6:"chhe", 7:"saat", 8:"aath", 9:"nau", 10:"das"}

def make_sentences(lang_prefix, lang_name, qty_map, count=NUM_SENTENCES):
    rows = []
    for i in range(1, count + 1):
        # choose how many items in this sentence
        k = random.randint(MIN_ITEMS_PER_SENT, MAX_ITEMS_PER_SENT)
        # unique items per sentence (no item repeats)
        selected_items = random.sample(items, k if k <= len(items) else len(items))
        # random quantities 1..10, repeats allowed (e.g., 2,7,1,8,2)
        qty_nums = [random.randint(1, 10) for _ in range(len(selected_items))]
        parts = [f"{qty_map[q]} {selected_items[j]}" for j, q in enumerate(qty_nums)]
        sentence = " , ".join(parts)
        rows.append((f"{lang_prefix}_{i:02d}", sentence, lang_name))
    return pd.DataFrame(rows, columns=["sentence_id", "sentence", "language"])

# --- Generate ---
tamil_df = make_sentences("tam", "tamil", tamil_qty)
english_df = make_sentences("eng", "english", english_qty)
hindi_df = make_sentences("hin", "hindi", hindi_qty)

# --- Save to Excel with 3 sheets ---
with pd.ExcelWriter(OUTPUT_XLSX, engine="xlsxwriter") as writer:
    tamil_df.to_excel(writer, sheet_name="Tamil", index=False)
    english_df.to_excel(writer, sheet_name="English", index=False)
    hindi_df.to_excel(writer, sheet_name="Hindi", index=False)

# Show previews to the user
import ace_tools as tools
tools.display_dataframe_to_user("Tamil (preview)", tamil_df.head(10))
tools.display_dataframe_to_user("English (preview)", english_df.head(10))
tools.display_dataframe_to_user("Hindi (preview)", hindi_df.head(10))

OUTPUT_XLSX


In [None]:
import pandas as pd
import random

# --- Config ---
INPUT_CSV = "/mnt/data/Filtered_Unique_Menu_Items (1).csv"
OUTPUT_XLSX = "/mnt/data/multilingual_sentences_random_qty.xlsx"
NUM_SENTENCES = 20
MIN_ITEMS_PER_SENT = 6
MAX_ITEMS_PER_SENT = 10
ITEM_NAME_COL = "Item_Name"  # confirmed earlier

# --- Load items ---
df_src = pd.read_csv(INPUT_CSV)
items = df_src[ITEM_NAME_COL].dropna().astype(str).tolist()

# --- Quantity word maps (1..10) ---
tamil_qty = {1:"oru", 2:"rendu", 3:"moonu", 4:"naalu", 5:"anju", 6:"aaru", 7:"ezhu", 8:"ettu", 9:"onpathu", 10:"pathu"}
english_qty = {1:"one", 2:"two", 3:"three", 4:"four", 5:"five", 6:"six", 7:"seven", 8:"eight", 9:"nine", 10:"ten"}
hindi_qty = {1:"ek", 2:"do", 3:"teen", 4:"char", 5:"paanch", 6:"chhe", 7:"saat", 8:"aath", 9:"nau", 10:"das"}

def make_sentences(lang_prefix, lang_name, qty_map, count=NUM_SENTENCES):
    rows = []
    for i in range(1, count + 1):
        # choose how many items in this sentence
        k = random.randint(MIN_ITEMS_PER_SENT, MAX_ITEMS_PER_SENT)
        # unique items per sentence
        selected_items = random.sample(items, k if k <= len(items) else len(items))
        # random quantities 1..10, repeats allowed (e.g., 2,7,1,8,2)
        qty_nums = [random.randint(1, 10) for _ in range(len(selected_items))]
        parts = [f"{qty_map[q]} {selected_items[j]}" for j, q in enumerate(qty_nums)]
        sentence = " , ".join(parts)
        rows.append((f"{lang_prefix}_{i:02d}", sentence, lang_name))
    return pd.DataFrame(rows, columns=["sentence_id", "sentence", "language"])

# --- Generate ---
tamil_df = make_sentences("tam", "tamil", tamil_qty)
english_df = make_sentences("eng", "english", english_qty)
hindi_df = make_sentences("hin", "hindi", hindi_qty)

# --- Save to Excel with 3 sheets ---
with pd.ExcelWriter(OUTPUT_XLSX, engine="xlsxwriter") as writer:
    tamil_df.to_excel(writer, sheet_name="Tamil", index=False)
    english_df.to_excel(writer, sheet_name="English", index=False)
    hindi_df.to_excel(writer, sheet_name="Hindi", index=False)
