In [1]:
from transformers import TapasTokenizer, TapasForQuestionAnswering
import pandas as pd
import torch
import requests

  from .autonotebook import tqdm as notebook_tqdm


In [92]:

model_name = "google/tapas-base-finetuned-sqa"
model = TapasForQuestionAnswering.from_pretrained(model_name)


Downloading: 100%|██████████| 1.55k/1.55k [00:00<00:00, 1.86MB/s]
Downloading: 100%|██████████| 443M/443M [00:33<00:00, 13.3MB/s]   


In [2]:
output_model_file = '../models/tapas.bin'
output_vocab_file = '../models/tapas_vocab.bin'

In [4]:
model_name = "google/tapas-base-finetuned-sqa"

In [5]:
tokenizer = TapasTokenizer.from_pretrained(model_name)

In [95]:
torch.save(model, output_model_file)

In [96]:
tokenizer.save_vocabulary(output_vocab_file)

('../models/tapas_vocab.bin',)

In [3]:
model = torch.load(output_model_file)

In [6]:
table = pd.read_csv("../data/TestEnglishData.csv")


In [7]:
table.dtypes

Villa Name             object
Location               object
Maximum Occupancy       int64
Country                object
Nearest Destination    object
Nearest Airport        object
Swimming Pool          object
Laundry                object
Kitchen                object
Servants               object
dtype: object

In [8]:
table = table.astype(str)

In [9]:
table.dtypes

Villa Name             object
Location               object
Maximum Occupancy      object
Country                object
Nearest Destination    object
Nearest Airport        object
Swimming Pool          object
Laundry                object
Kitchen                object
Servants               object
dtype: object

In [10]:
table

Unnamed: 0,Villa Name,Location,Maximum Occupancy,Country,Nearest Destination,Nearest Airport,Swimming Pool,Laundry,Kitchen,Servants
0,Giri Villa,Kathmandu,10,Nepal,Boudha,Tribhuvan International airport,Yes,Yes,Yes,Yes
1,Vellore Villa,Vellore,3,India,Vellore Fort,Bangalore International Airport,No,No,Yes,Yes
2,Bangalore Walas,Bangalore,70,India,Garden,Bangalore International Airport,No,No,No,No
3,Coffee Villa,Delhi,7,Nepal,Pashupatinanth,Indira Gandhi airport,No,Yes,Yes,Yes


In [11]:
### Nepali to english
def translate_text(text, source_lang="en", target_lang="ne"):
    url = "https://api.mymemory.translated.net/get"
    params = {
        "q": text,
        "langpair": f"{source_lang}|{target_lang}",
    }
    response = requests.get(url, params=params)
    data = response.json()
    translated_text = data["responseData"]["translatedText"]
    return translated_text

query="सेवकहरू"
translated=translate_text(query,"ne","en")
translated

'Servants'

In [12]:
query="Nearest destination to the villa which is close to Indira Gandhi Airport"
query="Question Answer"
translated=translate_text(query,"en","ne")
translated

'प्रश्न / जवाफ'

In [13]:
def get_answer(question, table, tokenizer, model, nepali_query=None):
    if type(question) != list:
        question = [question]
        nepali_query = [nepali_query]
        
    inputs = tokenizer(table=table, queries=question, padding="max_length", return_tensors="pt")
    outputs = model(**inputs)

    ans = tokenizer.convert_logits_to_predictions(
        inputs, outputs.logits.detach()
    )
    for i, a in enumerate(ans[0]):
        print("प्रश्न: ", nepali_query[i])
        print("जवाफ:")
        for x in a:
            print(translate_text(str(table.iloc[x]),"en","ne"))
        print("-------------------------------------------")

In [14]:
table

Unnamed: 0,Villa Name,Location,Maximum Occupancy,Country,Nearest Destination,Nearest Airport,Swimming Pool,Laundry,Kitchen,Servants
0,Giri Villa,Kathmandu,10,Nepal,Boudha,Tribhuvan International airport,Yes,Yes,Yes,Yes
1,Vellore Villa,Vellore,3,India,Vellore Fort,Bangalore International Airport,No,No,Yes,Yes
2,Bangalore Walas,Bangalore,70,India,Garden,Bangalore International Airport,No,No,No,No
3,Coffee Villa,Delhi,7,Nepal,Pashupatinanth,Indira Gandhi airport,No,Yes,Yes,Yes


In [15]:
def get_answer_in_nepali(nepali_query, table, tokenizer, model):
    english_query=translate_text(nepali_query,"ne","en")
    get_answer(english_query, table, tokenizer, model, nepali_query)

In [16]:
get_answer_in_nepali('भिलाको निकटतम गन्तव्य जुन इन्दिरा गान्धी एयरपोर्टको नजिक छ', table, tokenizer, model)

प्रश्न:  भिलाको निकटतम गन्तव्य जुन इन्दिरा गान्धी एयरपोर्टको नजिक छ
जवाफ:
पशुपतिनाथ
-------------------------------------------


In [21]:
question = "Nearest destination to the villa which is close to Indira Gandhi Airport"
question = [question]
inputs = tokenizer(table=table, queries=question, padding="max_length", return_tensors="pt")
outputs = model(**inputs)

ans = tokenizer.convert_logits_to_predictions(
    inputs, outputs.logits.detach()
)
for i, a in enumerate(ans[0]):
    print("query:\n", question[i])
    print("answer")
    for x in a:
        print(str(table.iloc[x]))
    print("-------------------------------------------")

query:
 Nearest destination to the villa which is close to Indira Gandhi Airport
answer
Pashupatinanth
-------------------------------------------


In [22]:
table

Unnamed: 0,Villa Name,Location,Maximum Occupancy,Country,Nearest Destination,Nearest Airport,Swimming Pool,Laundry,Kitchen,Servants
0,Giri Villa,Kathmandu,10,Nepal,Boudha,Tribhuvan International airport,Yes,Yes,Yes,Yes
1,Vellore Villa,Vellore,3,India,Vellore Fort,Bangalore International Airport,No,No,Yes,Yes
2,Bangalore Walas,Bangalore,70,India,Garden,Bangalore International Airport,No,No,No,No
3,Coffee Villa,Delhi,7,Nepal,Pashupatinanth,Indira Gandhi airport,No,Yes,Yes,Yes
