    Importing the necessary libraries

In [160]:
import pandas as pd
from rake_nltk import Rake
from nltk.stem import WordNetLemmatizer

    Reading the file

In [161]:
data=pd.read_csv("data.csv",encoding='iso-8859-1')
data.dropna(subset=['Basic Rights'], inplace=True)

    Function to extract keywords out of a given sentence

In [162]:
def keyword_extractor(query):
    r = Rake()
    r.extract_keywords_from_text(query)
    keywords = r.get_ranked_phrases()
    return keywords

    Using google translator for supporting different languages

In [163]:
from googletrans import Translator

translator = Translator()

    Taking the user query

In [164]:
user_query=input()
print(user_query)
user_query=translator.translate(user_query, src='hi', dest='en').text  #source=hindi destination=english
print(user_query)

tell what to do if police arrest me without reason
Tell what to do if police arrest me without reason


    Processing the user query and extracting keywords from it

In [165]:
user_keywords = keyword_extractor(user_query)
user_que = []
for phrase in user_keywords:
    words = phrase.split()
    user_que.extend(words)

lemmatizer = WordNetLemmatizer()
user_word = [lemmatizer.lemmatize(word) for word in user_que]

common_words=["give","information","law","provide","know","let","regarding","info"]
user_words = [word for word in user_word if word not in common_words]
user_words = [string.lower() for string in user_words]
user_words

['without', 'reason', 'police', 'arrest', 'tell']

    Finding the best matching law from the dataset

In [166]:
best_matching =[]
max_matching_keywords = 0
for law in list(data['Basic Rights']):
    temp_list=law.split()
    lemmatizer = WordNetLemmatizer()
    right = [lemmatizer.lemmatize(word) for word in temp_list]
    right = [string.lower() for string in right]
    matching_keywords = len(set(user_words) & set(right))
    best_matching.append([matching_keywords,law])

best_matching_law=[]

best_match=sorted(best_matching,reverse=True)
thr=max(best_match)[0]
for i in range(len(best_match)):
    if best_match[i][0]==thr:
        best_matching_law.append(best_match[i][1])
best_matching_law

['Protection against arrest and detention in certain cases',
 'Investigation and Arrest']

    Converting the dataframe to dictionary to process the best matching law

In [167]:
result_dict = data.set_index('Basic Rights').to_dict()['Description']

In [168]:
# from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
# import nltk
# model_name = "bert-base-uncased"

# # a) Get predictions
# nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)

# def get_answer(question, context):
#     QA_input = {
#         'question': question,
#         'context': context
#     }
#     res = nlp(QA_input)
#     return res['answer']

# answers_list = []    

# for i in range(len(best_matching_law)):
#     result = get_answer( user_query,result_dict[best_matching_law[i]])
#     answers_list.append(result)
# answers_list

    Using a pretrained model (Bert-Bidirectional Encoder Representations from Transformers) which could gemerate response according to the query and the context to the law
    Context: The description from the dataset about the best matching law

In [169]:
import torch
from transformers import BertForQuestionAnswering, BertTokenizer

model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

def answer_question(question, context):
    inputs = tokenizer.encode_plus(question, context, return_tensors="pt", max_length=512, truncation=True)
    start_scores, end_scores = model(**inputs).values()
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores)
    all_tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"].squeeze())
    answer = tokenizer.convert_tokens_to_string(all_tokens[answer_start:answer_end+1])

    return answer

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


    Handling the query and generating the response in english and his regional language

In [170]:
answers_list=[]
handle_query="I'm sorry, but I don't have access to your query as of my knowledge.To find out answer to your query, I recommend checking the latest news."
if (len(best_matching_law)!=0):
    for i in range(len(best_matching_law)):
        result = answer_question(user_query,result_dict[best_matching_law[i]])
        answers_list.append(result)
    for i in range(len(answers_list)):
        temp=translator.translate(answers_list[i], src='en', dest='hi').text
        print("Answer:",i+1,temp)
        print("Answer:",i+1,answers_list[i])
else:
    print(handle_query)
    print(translator.translate(handle_query, src='en', dest='hi').text)

Answer: 1 कोई भी व्यक्ति जिसे गिरफ्तार नहीं किया जाता है उसे हिरासत में नहीं लिया जाएगा, बिना सूचित किए, जैसे ही हो सकता है, इस तरह की गिरफ्तारी के लिए आधार का
Answer: 1 no person who is arrested shall be detained in custody without being informed , as soon as may be , of the grounds for such arrest
Answer: 2 यदि किसी अपराध में किसी व्यक्ति की भागीदारी का समर्थन करने के लिए पर्याप्त सबूत हैं, तो उन्हें गिरफ्तार किया जा सकता है, कुछ कानूनी प्रक्रियाओं और सुरक्षा उपायों के अधीन
Answer: 2 if there is sufficient evidence to support the involvement of an individual in a crime , they may be arrested , subject to certain legal procedures and safeguards
