In [30]:
# !pip install --upgrade --force-reinstall ctranslate2==3.24.0 
# !pip install tensorflow pandas
# !pip install transformers

In [15]:
import pandas as pd
import tensorflow as tf
import os
import numpy as np

from inference.engine import Model

In [6]:
model = Model('indic-en-preprint\ct2_fp16_model', model_type="ctranslate2", device='cpu')
# model = Model('indic-en-preprint/fairseq_model', model_type="fairseq")

Initializing sentencepiece model for SRC and TGT
Initializing model for translation


In [3]:
src_lang, tgt_lang = 'hin_Deva', 'eng_Latn'

In [4]:
sents = ["वह बहुत समझदार है।", "उसकी मुस्कान से सारा कमरा रोशन हो जाता है।", "उसका दिल सोने का है।", "वह हमेशा सबकी मदद करने को तैयार रहती है।", "उसकी हंसी सुनकर दिल खुश हो जाता है।", "वह बहुत प्रेरणादायक है।", "उसकी दयालुता हर किसी को छू जाती है।", "वह सबसे अलग और अनोखी है।"]
ops = model.batch_translate(sents, src_lang, tgt_lang)
ops

["He's very understanding.",
 'Her smile lights up the entire room.',
 'He has a heart of gold.',
 'She is always ready to help everyone.',
 'Hearing his laughter fills the heart with joy.',
 'He is very inspiring.',
 'His kindness touches everyone.',
 'He is the most different and unique.']

### Using Claude generated text for translation and comparision

In [9]:
lang_key_dict = {
    'HINDI': 'hin_Deva',
    'MALAYALAM': 'mal_Mlym',
    'ASSAMESE': 'asm_Beng',
    'ENGLISH': 'eng_Latn', 
    'BENGALI': 'ben_Beng',
    'KANNADA': 'kan_Knda',
    'TELUGU': 'tel_Telu',
    'ORIYA': 'ory_Orya',
    'GUJARATI': 'guj_Gujr'
}

In [7]:
%%time
tgt_lang = 'eng_Latn'

# Function to translate content based on language column
def translate_content(row):
    src_lang = lang_key_dict.get(row['Language'], None)
    if src_lang:
        return model.batch_translate([row['Content']], src_lang, tgt_lang)[0]
    return None

df2 = pd.read_excel('./calude_translations/claude_generations.xlsx')
df2.rename(columns={' language ': 'Language', ' offer_text ':'Content'}, inplace=True)
df2['Language'] = df2['Language'].str.strip()
df2.head()

CPU times: total: 125 ms
Wall time: 264 ms


Unnamed: 0,offer_id,Language,Content,english_translation
0,1,HINDI,डिजिटल बैंकिंग अपनाएं! पहले 3 महीने में 10 UP...,Adopt digital banking! Make 10 UPI transactio...
1,2,KANNADA,ನಿಮ್ಮ ಮೊದಲ ಫಿಕ್ಸ್ಡ್ ಡಿಪಾಸಿಟ್ ಮೇಲೆ 0.5% ಹೆಚ್ಚು...,Get 0.5% extra interest on your first fixed d...
2,3,TELUGU,"కొత్త సేవింగ్స్ ఖాతాపై ఉచిత ఆరోగ్య బీమా! ₹50,...",Free health insurance on new savings account!...
3,4,MALAYALAM,"വിദ്യാർത്ഥികൾക്കായി പ്രത്യേക ഓഫർ! ₹1,000 മിനി...",Special offer for students! Savings account w...
4,5,BENGALI,হোম লোনে স্পেশাল অফার! প্রসেসিং ফি-তে 50% ছাড...,Special offer on home loans! 50% off on proce...


In [10]:
%%time
df2['indic_translation'] = df2.apply(translate_content, axis=1)
df2.head()

CPU times: total: 6min 42s
Wall time: 1min 59s


Unnamed: 0,offer_id,Language,Content,english_translation,indic_translation
0,1,HINDI,डिजिटल बैंकिंग अपनाएं! पहले 3 महीने में 10 UP...,Adopt digital banking! Make 10 UPI transactio...,Adopt digital banking! Make 10 UPI transaction...
1,2,KANNADA,ನಿಮ್ಮ ಮೊದಲ ಫಿಕ್ಸ್ಡ್ ಡಿಪಾಸಿಟ್ ಮೇಲೆ 0.5% ಹೆಚ್ಚು...,Get 0.5% extra interest on your first fixed d...,Get 0. 5% extra interest on your first fixed d...
2,3,TELUGU,"కొత్త సేవింగ్స్ ఖాతాపై ఉచిత ఆరోగ్య బీమా! ₹50,...",Free health insurance on new savings account!...,Free health insurance on new savings account! ...
3,4,MALAYALAM,"വിദ്യാർത്ഥികൾക്കായി പ്രത്യേക ഓഫർ! ₹1,000 മിനി...",Special offer for students! Savings account w...,"Special offer for students! ₹1,000 minimum bal..."
4,5,BENGALI,হোম লোনে স্পেশাল অফার! প্রসেসিং ফি-তে 50% ছাড...,Special offer on home loans! 50% off on proce...,Special offer on home loans! 50 percent off on...


In [14]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
df2[[' english_translation', 'indic_translation']].head(9)

Unnamed: 0,english_translation,indic_translation
0,Adopt digital banking! Make 10 UPI transactions in first 3 months and get ₹500 cashback. {url} Terms and conditions apply,Adopt digital banking! Make 10 UPI transactions in the first 3 months and get ₹500 cashback. {url} Terms and conditions apply
1,"Get 0.5% extra interest on your first fixed deposit. Minimum deposit ₹25,000. {url} Terms apply","Get 0. 5% extra interest on your first fixed deposit. Minimum deposit is $25,000. {url} rules apply"
2,"Free health insurance on new savings account! Open account with ₹50,000 balance and get ₹2 lakh health insurance. {url} Terms apply","Free health insurance on new savings account! Start with ₹50,000 in savings and get ₹2 lakh in health insurance. {url} Terms apply"
3,"Special offer for students! Savings account with ₹1,000 minimum balance, free debit card. {url} Terms apply","Special offer for students! ₹1,000 minimum balance savings account, free debit card. {url} Terms apply"
4,"Special offer on home loans! 50% off on processing fee and ₹2,000 cashback on first 3 EMIs. {url} Terms apply","Special offer on home loans! 50 percent off on processing fee and ₹2,000 cashback on first 3 EMIs. {url} Terms apply"
5,Family savings offer! Open three accounts and get annual locker rent waiver. {url} Terms apply,"Family Savings Offer! Open three accounts, get annual locker rent forgiveness. {url} Terms apply"
6,New Year deposit scheme! Get additional 0.3% interest on ₹1 lakh FD and free VISA debit card. {url} Terms apply,New Year Deposit Scheme! ₹1 lakh FD with additional 0. 3% interest and free VISA debit card. {url} Terms apply
7,Merchant special! Open business account and get free POS machine with first year fee waiver. {url} Terms apply,"Merchant Special! Open a business account and get a free POS machine, first-year fees waived. {url} Terms and conditions apply"
8,Women's Day special! Free accident insurance with women's savings account. {url} Terms apply,Women's Day Special! Free Accident Insurance for Women's Savings Accounts. {url} Terms apply


In [None]:
# model_dir = './encoder_models/universal_sentence_encoder/'
# # Create the directory if it doesn't exist
# if not os.path.exists(model_dir):
#     os.makedirs(model_dir)
# # Download and save the model locally
# model_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
# embed_model = hub.load(model_url)

# # Save the model in TensorFlow SavedModel format
# tf.saved_model.save(embed_model, model_dir)

In [16]:
model_dir = './encoder_models/universal_sentence_encoder/'

# Load the model from the local directory
embed_model = tf.saved_model.load(model_dir)

prompt_ = 'where are the easter eggs?'

def generate_semantic_embeddings(text_):
    val = embed_model([text_])[0]
    return np.array(val)

print(type(generate_semantic_embeddings(prompt_)))
print(generate_semantic_embeddings(prompt_).shape)

<class 'numpy.ndarray'>
(512,)


In [19]:
df2['semantic_embeddings'] = df2['indic_translation'].apply(generate_semantic_embeddings)
df2.head(2)

Unnamed: 0,offer_id,Language,Content,english_translation,indic_translation,semantic_embeddings
0,1,HINDI,डिजिटल बैंकिंग अपनाएं! पहले 3 महीने में 10 UPI लेनदेन करें और ₹500 का कैशबैक पाएं। {url} नियम और शर्तें लागू,Adopt digital banking! Make 10 UPI transactions in first 3 months and get ₹500 cashback. {url} Terms and conditions apply,Adopt digital banking! Make 10 UPI transactions in the first 3 months and get ₹500 cashback. {url} Terms and conditions apply,"[-0.06279335, -0.08234897, 0.06570612, -0.011835739, -0.06148587, 0.05064923, -0.07641385, -0.0019887919, -0.010464231, -0.08569036, -0.03522917, -0.06622817, 0.017096644, -0.08115493, 0.07069805, -0.013183683, -0.03958686, -0.061098382, 0.075691566, 0.073157735, -0.07518699, -0.036868516, 0.0586222, -0.016687354, 0.05909014, -0.07354271, 0.024848731, 0.026767397, -0.011433944, 0.04973659, 0.0726621, -0.010590959, 0.011511381, 0.04342427, 0.07720539, 0.019545635, 0.0580026, 0.015614431, -0.014812285, 0.000810189, 0.047893643, 0.024627812, 0.0055711027, -0.033402957, 0.0675991, -0.028233688, -0.022589915, -0.042362247, -0.019771857, -0.054816652, -0.0612316, 0.054464556, -0.044991624, 0.03203054, -0.027729345, 0.06965488, 0.06434224, -0.08157508, -0.023690771, -0.0033513543, -0.05811212, 0.045949325, 0.068553396, 0.06503879, 0.0321423, 0.04643266, 0.03214546, -0.012230064, -0.06845953, 0.045918804, 0.052757148, 0.001100141, -0.01979643, 0.019600622, 0.017714785, 0.0054894118, 0.065792546, 0.030697117, -0.059203953, -0.07481173, -0.05881866, 0.0022732632, -0.00085161155, -0.026839077, 0.01105469, 0.0025519014, 0.005559877, 0.039351217, -0.043023802, 0.0831232, -0.050083965, 0.01441955, -0.009394929, -0.030740375, 0.079788566, -0.05591113, 0.034341697, 0.052515127, -0.044559132, -0.05086242, ...]"
1,2,KANNADA,"ನಿಮ್ಮ ಮೊದಲ ಫಿಕ್ಸ್ಡ್ ಡಿಪಾಸಿಟ್ ಮೇಲೆ 0.5% ಹೆಚ್ಚುವರಿ ಬಡ್ಡಿ ಪಡೆಯಿರಿ. ಕನಿಷ್ಠ ಠೇವಣಿ ₹25,000. {url} ನಿಯಮಗಳು ಅನ್ವಯಿಸುತ್ತವೆ","Get 0.5% extra interest on your first fixed deposit. Minimum deposit ₹25,000. {url} Terms apply","Get 0. 5% extra interest on your first fixed deposit. Minimum deposit is $25,000. {url} rules apply","[-0.07064705, -0.08348525, 0.05635025, -0.043083206, 0.03605768, 0.046074204, 0.03801304, 0.023214363, 0.066929266, -0.08409765, -0.022780603, 0.05785051, 0.036134895, -0.02368411, -0.007276454, 0.07925014, 0.05387832, -0.06755335, -0.019760335, -0.00013715161, -0.05994656, 0.05848932, 0.0671636, 0.052389883, 0.036239788, -0.0010711898, 0.04806694, -0.071624935, -0.06058858, -0.0008897188, -0.028298473, -0.032224834, 0.03849544, -0.06483062, -0.062436838, -0.062138576, 0.025131552, 0.057956807, 0.07123448, 0.07391777, -0.023643726, -0.0030577767, 0.02862718, 0.032232318, -0.039504003, 0.0747742, -0.03659264, -0.022989137, -0.03434018, -0.04635427, -0.07876397, 0.042836174, -0.054848254, -0.05870053, 0.013067477, 0.06496622, 0.015961332, -0.07805041, 0.030788, -0.033998683, -0.032548033, 0.012712098, 0.054632742, -0.034430828, 0.04591141, 0.009390439, 0.0011482019, -0.050777078, 0.041529477, 0.014811275, -0.011899975, -0.031058481, 0.008544971, 0.06087542, -0.036219902, 0.07638309, 0.03537304, 0.013263376, -0.013128799, -0.085811295, -0.08297776, 0.061076954, 0.061854176, -0.06452391, 0.011256483, -0.017088352, 0.051629227, -0.005141062, 0.0026634918, 0.03778259, -0.01460975, 0.06938353, -0.080668345, 0.048896555, -0.033358574, -0.0086240545, -0.02452125, -0.0094752135, -0.043183375, -0.017354667, ...]"


In [34]:
# import os
# from transformers import TFAutoModel, AutoTokenizer

# # Define the model directory
# model_dir = './encoder_models/paraphrase_mpnet_v2/'

# # Create the directory if it doesn't exist
# if not os.path.exists(model_dir):
#     os.makedirs(model_dir)

# # Load the Paraphrase MPNet V2 model and tokenizer for TensorFlow
# model_name = "sentence-transformers/paraphrase-mpnet-base-v2"
# model = TFAutoModel.from_pretrained(model_name)
# tokenizer = AutoTokenizer.from_pretrained(model_name)

# # Save the model and tokenizer locally
# model.save_pretrained(model_dir)
# tokenizer.save_pretrained(model_dir)

# print(f"Model and tokenizer saved to {model_dir}")


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFMPNetModel: ['embeddings.position_ids']
- This IS expected if you are initializing TFMPNetModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFMPNetModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFMPNetModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFMPNetModel for predictions without further training.


Model and tokenizer saved to ./encoder_models/paraphrase_mpnet_v2/


In [42]:
import numpy as np
from transformers import TFAutoModel, AutoTokenizer

# Define the model directory where the model was saved
model_dir = './encoder_models/paraphrase_mpnet_v2/'

# Load the saved model and tokenizer
model = TFAutoModel.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)

# Function to generate semantic embeddings
def generate_contextual_embeddings(text_):
    inputs = tokenizer(text_, return_tensors='tf')
    outputs = model(inputs)
    embeddings = outputs.last_hidden_state[:, 0, :]  # Use the embeddings for the [CLS] token
    return np.array(embeddings)[0]

# Test with a prompt
prompt_ = 'where are the easter eggs?'
print(type(generate_contextual_embeddings(prompt_)))
print(generate_contextual_embeddings(prompt_).shape)

All model checkpoint layers were used when initializing TFMPNetModel.

All the layers of TFMPNetModel were initialized from the model checkpoint at ./encoder_models/paraphrase_mpnet_v2/.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFMPNetModel for predictions without further training.


<class 'numpy.ndarray'>
(768,)


In [45]:
op = generate_contextual_embeddings(prompt_)
op.shape

(768,)

In [44]:
df2['contextual_embedings'] = df2['indic_translation'].apply(generate_contextual_embeddings)
df2.head(2)

Unnamed: 0,offer_id,Language,Content,english_translation,indic_translation,semantic_embeddings,contextual_embedings
0,1,HINDI,डिजिटल बैंकिंग अपनाएं! पहले 3 महीने में 10 UPI लेनदेन करें और ₹500 का कैशबैक पाएं। {url} नियम और शर्तें लागू,Adopt digital banking! Make 10 UPI transactions in first 3 months and get ₹500 cashback. {url} Terms and conditions apply,Adopt digital banking! Make 10 UPI transactions in the first 3 months and get ₹500 cashback. {url} Terms and conditions apply,"[-0.06279335, -0.08234897, 0.06570612, -0.011835739, -0.06148587, 0.05064923, -0.07641385, -0.0019887919, -0.010464231, -0.08569036, -0.03522917, -0.06622817, 0.017096644, -0.08115493, 0.07069805, -0.013183683, -0.03958686, -0.061098382, 0.075691566, 0.073157735, -0.07518699, -0.036868516, 0.0586222, -0.016687354, 0.05909014, -0.07354271, 0.024848731, 0.026767397, -0.011433944, 0.04973659, 0.0726621, -0.010590959, 0.011511381, 0.04342427, 0.07720539, 0.019545635, 0.0580026, 0.015614431, -0.014812285, 0.000810189, 0.047893643, 0.024627812, 0.0055711027, -0.033402957, 0.0675991, -0.028233688, -0.022589915, -0.042362247, -0.019771857, -0.054816652, -0.0612316, 0.054464556, -0.044991624, 0.03203054, -0.027729345, 0.06965488, 0.06434224, -0.08157508, -0.023690771, -0.0033513543, -0.05811212, 0.045949325, 0.068553396, 0.06503879, 0.0321423, 0.04643266, 0.03214546, -0.012230064, -0.06845953, 0.045918804, 0.052757148, 0.001100141, -0.01979643, 0.019600622, 0.017714785, 0.0054894118, 0.065792546, 0.030697117, -0.059203953, -0.07481173, -0.05881866, 0.0022732632, -0.00085161155, -0.026839077, 0.01105469, 0.0025519014, 0.005559877, 0.039351217, -0.043023802, 0.0831232, -0.050083965, 0.01441955, -0.009394929, -0.030740375, 0.079788566, -0.05591113, 0.034341697, 0.052515127, -0.044559132, -0.05086242, ...]","[-0.078879766, 0.13790976, 0.035404902, -0.12721097, 0.14067143, 0.11684385, -0.027707297, 0.11565995, -0.17882341, 0.15461062, 0.11597933, 0.055282936, 0.07095822, 0.38361347, -0.006678503, 0.07475465, 0.04911974, -0.08644661, -0.10793774, 0.04240662, 0.2470848, -0.040077373, -0.017820619, -0.31319278, 0.021302447, -0.05589515, 0.052829556, 0.00013461802, -0.11594515, 0.024023142, -0.0035667606, 0.07989001, 0.23833632, 0.09155989, 0.08080955, -0.046088718, -0.037198402, 0.005426362, -0.30039665, 0.13055201, 0.20098582, -0.15796815, 0.13933162, -0.06289911, -0.0067791976, 0.31295168, -0.24203002, -0.061804157, 0.19842897, 0.018713586, -0.02053505, 0.07085898, 0.20679872, 0.015574882, 0.019246995, 0.028791891, -0.0924741, 0.25761443, -0.0061402284, -0.14188653, -0.11672087, -0.22644803, 0.07932238, -0.04874191, -0.068602085, 0.0160707, 0.18168598, -0.0009093508, -0.2046684, -0.0029250258, 0.0114475405, 0.09368828, -0.03708132, -0.10970088, -0.19224058, -0.19951624, -0.14377847, -0.07344102, 0.0030593053, 0.09052134, -0.023326308, 0.0062136687, 0.1768382, 0.12831497, -0.051881947, -0.0796406, 0.026988946, -0.0067981835, 0.0826343, -0.11473152, 0.16699007, 0.04907124, -0.03535266, -0.106793344, 0.009197295, 0.09869806, -0.10436119, 0.0032936716, 0.051540438, 0.42076725, ...]"
1,2,KANNADA,"ನಿಮ್ಮ ಮೊದಲ ಫಿಕ್ಸ್ಡ್ ಡಿಪಾಸಿಟ್ ಮೇಲೆ 0.5% ಹೆಚ್ಚುವರಿ ಬಡ್ಡಿ ಪಡೆಯಿರಿ. ಕನಿಷ್ಠ ಠೇವಣಿ ₹25,000. {url} ನಿಯಮಗಳು ಅನ್ವಯಿಸುತ್ತವೆ","Get 0.5% extra interest on your first fixed deposit. Minimum deposit ₹25,000. {url} Terms apply","Get 0. 5% extra interest on your first fixed deposit. Minimum deposit is $25,000. {url} rules apply","[-0.07064705, -0.08348525, 0.05635025, -0.043083206, 0.03605768, 0.046074204, 0.03801304, 0.023214363, 0.066929266, -0.08409765, -0.022780603, 0.05785051, 0.036134895, -0.02368411, -0.007276454, 0.07925014, 0.05387832, -0.06755335, -0.019760335, -0.00013715161, -0.05994656, 0.05848932, 0.0671636, 0.052389883, 0.036239788, -0.0010711898, 0.04806694, -0.071624935, -0.06058858, -0.0008897188, -0.028298473, -0.032224834, 0.03849544, -0.06483062, -0.062436838, -0.062138576, 0.025131552, 0.057956807, 0.07123448, 0.07391777, -0.023643726, -0.0030577767, 0.02862718, 0.032232318, -0.039504003, 0.0747742, -0.03659264, -0.022989137, -0.03434018, -0.04635427, -0.07876397, 0.042836174, -0.054848254, -0.05870053, 0.013067477, 0.06496622, 0.015961332, -0.07805041, 0.030788, -0.033998683, -0.032548033, 0.012712098, 0.054632742, -0.034430828, 0.04591141, 0.009390439, 0.0011482019, -0.050777078, 0.041529477, 0.014811275, -0.011899975, -0.031058481, 0.008544971, 0.06087542, -0.036219902, 0.07638309, 0.03537304, 0.013263376, -0.013128799, -0.085811295, -0.08297776, 0.061076954, 0.061854176, -0.06452391, 0.011256483, -0.017088352, 0.051629227, -0.005141062, 0.0026634918, 0.03778259, -0.01460975, 0.06938353, -0.080668345, 0.048896555, -0.033358574, -0.0086240545, -0.02452125, -0.0094752135, -0.043183375, -0.017354667, ...]","[0.010779031, 0.067730516, 0.050242662, -0.08396285, 0.1708701, -0.018776001, -0.0335462, -0.021874532, -0.108794734, 0.076467544, 0.037348613, -0.0566044, 0.04355195, 0.37843072, 0.02764845, 0.10002274, -0.09928024, 0.100630194, 0.0874863, -0.03894165, 0.22240853, -0.19257864, -0.16065583, -0.20025131, -0.23310591, 0.04196509, 0.08271134, 0.0131022055, -0.12569228, -0.02046759, 0.08739575, 0.12318015, 0.161152, 0.35287917, 0.06359607, -0.14548263, 0.0026291772, -0.0065356046, -0.012284001, -0.08673214, 0.10788003, -0.1893535, -0.07710458, -0.11528161, -0.085536584, 0.11280583, 0.23509982, -0.072567806, 0.050739106, 0.20924601, 0.060905132, -0.17525594, -0.05641004, 0.020510228, -0.059152175, -0.12492334, -0.06491361, 0.04193167, -0.2457914, 0.051164556, -0.041437015, -0.04781973, 0.032999374, 0.07695628, 0.078741446, 0.07336651, -0.16770351, -0.15001108, -0.08413944, -0.1765466, 0.15082502, 0.318275, 0.026942205, -0.00871712, -0.042452462, 0.07661405, -0.033205353, 0.19261795, 0.024253517, 0.014951391, -0.06321065, 0.012642536, -0.07853774, 0.21278477, 0.16799223, -0.096445344, -0.040077295, -0.043696366, 0.1543918, 0.11510406, 0.38187438, 0.015355142, -0.08683703, 0.027139962, 0.22979751, 0.08152429, 0.10150401, -0.0468046, 0.025585152, 0.17896171, ...]"
