# Data Retrieval through Speech Query

### Import all the dependencies needed for execution

In [16]:
from tkinter import *
import tkinter
import speech_recognition as sr
import pyaudio
import torch_scatter
import transformers
from transformers import TapasConfig,TapasTokenizer,TapasModel,TapasForQuestionAnswering
import torch
import numpy as np
import pandas as pd
import time
from IPython.display import HTML, display

### Read testdata to table

In [19]:
table=pd.read_csv('data1.csv').applymap(str)
table.tail()

Unnamed: 0,Season,Driver,Team,Engine,Poles,Wins,Podiums,Points,Marginofdefeat
57,2007,LewisHamilton,McLaren,Mercedes,6,4,12,109.0,1.0
58,2008,FelipeMassa,Ferrari,Ferrari,6,6,10,97.0,1.0
59,2009,SebastianVettel,RedBull,Renault,4,4,8,84.0,11.0
60,2010,FernandoAlonso,Ferrari,Ferrari,2,5,10,252.0,4.0
61,2011,JensonButton,McLaren,Mercedes,0,3,12,270.0,122.0


## Weakly Supervised Table Parsing via pre-training
<i>Answering natural language questions over tables is usually seen as a semantic parsing task</i>. To alleviate the collection cost of full logical forms, one popular approach focuses on weak supervision consisting of denotations instead of logical forms. However, training semantic parsers from weak supervision poses difficulties, and in addition, the generated logical forms are only used as an intermediate step prior to retrieving the denotation. In this paper, we present TAPAS, an approach to question answering over tables without generating logical forms. TAPAS trains from weak supervision, and predicts the denotation by selecting table cells and optionally applying a corresponding aggregation operator to such selection. TAPAS extends BERT’s architecture to encode tables as input, initializes from an effective joint pre-training of text segments and tables crawled from Wikipedia, and is trained end-to-end.<br><br>
TAPAS is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained with a causal language modeling (CLM) objective are better in that regard.
TAPAS has checkpoints fine-tuned on SQA, which are capable of answering questions related to a table in a conversational set-up. This means that you can ask follow-up questions such as “what is his age?” related to the previous question. Note that the forward pass of TAPAS is a bit different in case of a conversational set-up: in that case, you have to feed every table-question pair one by one to the model, such that the prev_labels token type ids can be overwritten by the predicted labels of the model to the previous question.

In [20]:
modelName='google/tapas-base-finetuned-wtq'
tokenizer=TapasTokenizer.from_pretrained(modelName)
model=TapasForQuestionAnswering.from_pretrained(modelName)

Only three type of aggregations are classified in this model
<ul><li>SUM</li><li>AVERAGE</li><li>COUNT</li></ul>

In [22]:
def qu(queries):
    inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
    outputs=model(**inputs)
    predicted_answer_coordinates,predicted_answer_operators=tokenizer.convert_logits_to_predictions(inputs,outputs.logits.detach(),outputs.logits_aggregation.detach())
    id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3:"COUNT"}
    aggregation_predictions_string = [id2aggregation[x] for x in predicted_answer_operators]
    answers=[]
    for coordinates in predicted_answer_coordinates:
        if len(coordinates) == 1:
        # only a single cell:
            answers.append(table.iat[coordinates[0]])
        else:
            cell_values = []
            for coordinate in coordinates:
                cell_values.append(table.iat[coordinate])
            answers.append(", ".join(cell_values))
    for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
        print(query)
        print(answer)
        if predicted_agg == "NONE":
            answer=answer
        elif predicted_agg=='SUM':
            an=0
            ans=answer.split(',')
            for v in ans:
                an=an+float(v)
            answer=an
        elif predicted_agg=='AVERAGE':
            an=0
            ns=answer.split(',')
            for v in ns:
                an=an+float(v)
            answer=an//len(ns)
        else:
            ans=[str(x) for x in answer.split(',')]
            answer=len(set(ans))
            print(set(ans))
    co=f'Predicted Answer :- {answer}'
    label3=Label(frame,text=co,font=('ARIAL',16))
    label3.place(x=20,y=400)
    #print(co)
    #win.mainloop()
def listenfunc():
    q=[]
    recognizer=sr.Recognizer()
    with sr.Microphone() as source:
        recognizer.adjust_for_ambient_noise(source)
        print('Listening...')
        audio = recognizer.listen(source)
        try:
            print('Recognizing...')
            command = recognizer.recognize_google(audio)
        except:
            command = "couldn't recognize you"
    ss=f'Initial Input:{command}'
    label2=Label(frame,text=ss,font=('ARIAL',16))
    label2.place(x=20,y=250)
    q.append(command)
    qu(q)
def hellfunc():
    but2=Button(frame,text='start listening',font=('ARIAL',12),width=20,height=1,bg='green',command=listenfunc)
    but2.place(x=400,y=170)

### Run all the above cells once and when you are ready to speak to database,run the following cell.
When you the run below cell,an interface will open on your screen<br>
please click <b>ASK ME</b> button<br>
when you are ready to speak to model(formulate a question corresponding to test data table)<br>
click <b>Start Listening</b>button<br>
wait for 2 seconds(mean time to set ambient noise)<br>
start speaking to model(please ensure you have given access to microphone device and make sure its working properly)<br>
Within a span of 2 seconds of phrase timeout ,model will stop lisening and will start recognizing and conversion of voice input.<br>
If everything went well you can see a input and predicted output on the interface(hopefully true output) and if you want to test again,close the interface and run the below cell again else try again
<img src='k.png'>

In [30]:
win=tkinter.Tk()
frame=Frame(win,width=1000,height=600)
frame.pack()
var=StringVar()
lab1=Label(win,text='Data Retrieval through Speech Query',font=('ARIAL',25))
lab1.place(x=200,y=30)
but1=Button(win,text='ASK ME',font=('ARIAL',15),width=40,height=2,bg='orange',command=hellfunc)
but1.place(x=250,y=100)
win.title('FinalDemo')
win.mainloop()

Listening...
Recognizing...
driver in 2007
LewisHamilton


## That's it for this sample demo----Thankyou----