In [1]:
import pandas as pd
import torch
import requests

In [2]:
from transformers import TapasTokenizer, TapasForQuestionAnswering

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from transformers import pipeline, set_seed


In [5]:
tapas_model_name = "google/tapas-base-finetuned-sqa"
tapas_model = TapasForQuestionAnswering.from_pretrained(tapas_model_name)

In [6]:
output_model_file = '../models/tapas.bin'
output_vocab_file = '../models/tapas_vocab.bin'
torch.save(tapas_model, output_model_file)

In [7]:
tapas_tokenizer = TapasTokenizer.from_pretrained(tapas_model_name)

In [8]:
tapas_tokenizer.save_vocabulary(output_vocab_file)

('../models/tapas_vocab.bin',)

In [9]:
# table = pd.read_csv("../data/TestEnglishData.csv")
table = pd.read_csv("../data/Luxury watch.csv")
table = table.astype(str)
table.head(2)

Unnamed: 0,Brand,Model,Case Material,Strap Material,Movement Type,Water Resistance,Case Diameter (mm),Case Thickness (mm),Band Width (mm),Dial Color,Crystal Material,Complications,Power Reserve,Price (USD)
0,Rolex,Submariner,Stainless Steel,Stainless Steel,Automatic,300 meters,40.0,13.0,20,Black,Sapphire,Date,48 hours,9500
1,Omega,Seamaster,Titanium,Rubber,Automatic,600 meters,43.5,14.47,21,Blue,Sapphire,Date,60 hours,5800


In [10]:
### Nepali to english
def translate_text(text, source_lang="en", target_lang="ne"):
    url = "https://api.mymemory.translated.net/get"
    params = {
        "q": text,
        "langpair": f"{source_lang}|{target_lang}",
    }
    response = requests.get(url, params=params)
    data = response.json()
    translated_text = data["responseData"]["translatedText"]
    return translated_text

query="सेवकहरू"
translated=translate_text(query,"ne","en")
translated

'Servants'

In [11]:
query="Nearest destination to the villa which is close to Indira Gandhi Airport"
query="Question Answer"
translated=translate_text(query,"en","ne")
translated

'प्रश्न / जवाफ'

In [12]:
def get_answer(question, table, tokenizer, model, nepali_query=None):
    if type(question) != list:
        question = [question]
        nepali_query = [nepali_query]
        
    inputs = tokenizer(table=table, queries=question, padding="max_length", return_tensors="pt")
    outputs = model(**inputs)

    ans = tokenizer.convert_logits_to_predictions(
        inputs, outputs.logits.detach()
    )
    for i, a in enumerate(ans[0]):
        print("प्रश्न: ", nepali_query[i])
        print("जवाफ:")
        for x in a:
            print(translate_text(str(table.iloc[x]),"en","ne"))
        print("-------------------------------------------")

In [13]:
def get_answer_in_nepali(nepali_query, table, tokenizer, model):
    english_query=translate_text(nepali_query,"ne","en")
    get_answer(english_query, table, tokenizer, model, nepali_query)

In [14]:
get_answer_in_nepali('भिलाको निकटतम गन्तव्य जुन इन्दिरा गान्धी एयरपोर्टको नजिक छ', table, tapas_tokenizer, tapas_model)

प्रश्न:  भिलाको निकटतम गन्तव्य जुन इन्दिरा गान्धी एयरपोर्टको नजिक छ
जवाफ:
Hamilton
-------------------------------------------


In [None]:

generator = pipeline('text2text-generation', model='facebook/mbart-large-50')
set_seed(42)

In [112]:
question = "Strap color of the watch made by rolex"
question = [question]
inputs = tapas_tokenizer(table=table, queries=question, padding="max_length", return_tensors="pt")
outputs = tapas_model(**inputs)

ans = tapas_tokenizer.convert_logits_to_predictions(
    inputs, outputs.logits.detach()
)
ans = ans[0][0]
answer_cells=[str(table.iloc[i])for i in ans]
text=", ".join(answer_cells)
new_text = "Question: " + question[0] + "? " + "Answer: " + text 

print(new_text)

Question: Strap color of the watch made by rolex? Answer: Stainless Steel


In [113]:

generator(new_text, max_length=150, num_return_sequences=5)


[{'generated_text': 'Question: Strap color of the watch made by rolex? Answer: Stainless Steel'},
 {'generated_text': 'Question: Strap color of the watch made by rolx? Answer: Stainless Steel'},
 {'generated_text': 'Question: Strap color of the Watch made by rolex? Answer: Stainless Steel'},
 {'generated_text': 'Question: Strap color of the watch made by rolex?'},
 {'generated_text': 'Question: What is the color of the watch made by rolex?'}]