In [None]:
DISABLE_INTERNET = True
INFERENCE = True
USE_PIPELINE = False

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from tqdm import tqdm

In [None]:
# from transformers import pipeline
# if DISABLE_INTERNET:
#     model_path = "../input/localnb001-export-transformers"
#     model = pipeline('question-answering', model=model_path, tokenizer=model_path, device=0)
# else:
#     model = pipeline('question-answering', model='bert-base-multilingual-cased', device=0)

from transformers import pipeline, BertForQuestionAnswering, BertTokenizerFast
import torch
if DISABLE_INTERNET:
    model_path = "../input/localnb001-export-transformers"
    model = BertForQuestionAnswering.from_pretrained(model_path)
    tokenizer = BertTokenizerFast.from_pretrained(model_path)
    
    # Load model weights and optimizer state
    output_model = "../input/localnb002-fine-tune/model.pth"
    checkpoint = torch.load(output_model, map_location='cpu')
    model.load_state_dict(checkpoint['model_state_dict'])
    
    if USE_PIPELINE:
        #model = pipeline('question-answering', model=model_path, tokenizer=model_path, device=0)
        model = pipeline("question-answering", model=model, tokenizer=tokenizer, device=0)
    else:
        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        model.to(device)
else:
    if USE_PIPELINE:
        model = pipeline('question-answering', model='bert-base-multilingual-cased', device=0)
    else:
        model = BertForQuestionAnswering.from_pretrained('bert-base-multilingual-cased')
        tokenizer = BertTokenizerFast.from_pretrained('bert-base-multilingual-cased')
        
        # Load model weights and optimizer state
        output_model = "../input/localnb002-fine-tune/model.pth"
        checkpoint = torch.load(output_model, map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        
        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        model.to(device)

# Inference

In [None]:
test = pd.read_csv("../input/chaii-hindi-and-tamil-question-answering/test.csv")
test.head()

In [None]:
def test_fn(use_pipeline=False):
    test["PredictionString"] = ""
    tqdm_df_itertuples = tqdm(test.itertuples(), total=len(test))
    for row in tqdm_df_itertuples:
        i = row[0]
        context = row[2]
        question = row[3]
        
        if use_pipeline:
            output = model(question=question, context=context)
            pred = output["answer"]
        else:
            inputs = tokenizer(question, 
                               context, 
                               add_special_tokens=True,
                               max_length=512,
                               padding=True, 
                               truncation=True, 
                               return_tensors="pt")
            inputs.to(device)
            input_ids = inputs["input_ids"].tolist()[0]
            outputs = model(**inputs)
            answer_start_scores = outputs.start_logits
            answer_end_scores = outputs.end_logits

            # Get the most likely beginning of answer with the argmax of the score
            answer_start = torch.argmax(answer_start_scores)
            # Get the most likely end of answer with the argmax of the score
            answer_end = torch.argmax(answer_end_scores) + 1

            pred = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))

        test.loc[i, "PredictionString"] = pred
        
    return test

In [None]:
test = test_fn(use_pipeline=USE_PIPELINE)

In [None]:
display(test)

In [None]:
test[["id", "PredictionString"]].to_csv("submission.csv", index=False)