In [None]:
!pip install whoosh
!pip install allennlp
!pip install allennlp_models

In [None]:
from whoosh.fields import Schema, TEXT, ID
from whoosh import index
import os, os.path
from whoosh import index
from whoosh import qparser
from whoosh.qparser import QueryParser
import time
from allennlp.predictors.predictor import Predictor
import allennlp_models.rc

In [None]:
mrc = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bidaf-elmo-model-2020.03.19.tar.gz") # machine reading comprehension from Allen AI

In [None]:
file = open('ug-sentences.txt', mode='r') # open the regulations file
data = file.readlines() # sentences are in new-lines

In [None]:
schema = Schema(content = TEXT(stored = True)) # create schemea of only content (as of now)

if not os.path.exists("index_dir"): 
    os.mkdir("index_dir")

In [None]:
ix = index.create_in("index_dir", schema) # initialize whoosh index with above schema
writer = ix.writer()

In [None]:
for i in range(len(data)):
    writer.add_document(content = data[i]) # fill the index with sentences

writer.commit()

In [None]:
def index_search(dir_name, search_fields, search_query):

    idx = index.open_dir(dir_name) # get index from the directory 
    schema = idx.schema # get schema
    
    og = qparser.OrGroup.factory(0.9) # OR group with sacling factor 0.9
    mp = qparser.MultifieldParser(search_fields, schema, group = og) # multifield parser

    q = mp.parse(search_query) # parse the query
    
    with ix.searcher() as searcher: # search
        results = searcher.search(q, terms=True, limit = 10) # set limit as 10 or None.

        # print(results[0:10]) # print top 10 results
        # ctr = 0
        # for i in results: # print
        #     print(i['content'])
        #     ctr += 1
        #     if ctr == 10:
        #         break

        result_arr = []
        for i in range(0,10):
            try:
                result_arr.append(results[i]['content']) # get only the plaintext answer
            except:
                pass

        # return results[0]['content'] # return only top result
        
        return result_arr

In [None]:
def process_query(query, mrc = False):
    start = time.time()
    print('QUESTION:', query)

    answers = index_search("index_dir", ['content'], query) # search for the query

    print('ANSWER:')

    if mrc:
        passage = '\n'.join(answer for answer in answers) # join the top results as a passsage
        answer = mrc.predict(query, passage)["best_span_str"].split('.') # get best prediction/answer
        print(answer[0]) # answer with MRC, approx 10 sec
    else:
        print(answers[0]) # answer without MRC, approx 0.01 sec
        
    
    end = time.time()
    print('Time taken:', round(100 * (end - start))/100, 'secs')
    print('-' * 50)
    print()

    # return answer

In [None]:
process_query('What is the normal load for UG students')
process_query('If I fail a course and take it again in the later semester, will my earlier course with F grade be removed from the transcript')
process_query('what is the process of registration?')
process_query('how many seats are there in cse for admission?')
process_query('what is the admission criteria for btech')
process_query('I am in 1st year. Can I take overload?')
process_query('I am in 2nd year. Can I take overload?')
process_query('what happens if I miss the endsem because of a medical reason?')
process_query('what happens if I fail a course?')
process_query('what happens if I get an F grade in a course?')
process_query('Can i take 8 credits of online courses in a semester')
process_query('how many credits do i need to graduate')
process_query('how is my semester graded')
process_query('what if I do more than 156 credits in my btech course')
process_query('How is sgpa calculated')
process_query('formula for sgpa')

QUESTION: What is the normal load for UG students
ANSWER:
Normal load for the first and second year students is 16-20 credits and for the third and fourth year students is 16-22 credits.

Time taken: 0.01 secs
--------------------------------------------------

QUESTION: If I fail a course and take it again in the later semester, will my earlier course with F grade be removed from the transcript
ANSWER:
The pass grade for the respective course will count towards the SGPA of the semester in which the course is passed and not in the semester where F’ grade was awarded.

Time taken: 0.01 secs
--------------------------------------------------

QUESTION: what is the process of registration?
ANSWER:
The number of seats and the process for application and admission are described in each year’s prospectus.

Time taken: 0.0 secs
--------------------------------------------------

QUESTION: how many seats are there in cse for admission?
ANSWER:
Some seats may be reserved for foreign students.



### Generating txt files from json

In [None]:
import json

In [None]:
with open('ugr.json') as f:
  js = json.load(f)

In [None]:
js = dict(js)

In [None]:
f = open('ug-sentences.txt','w')
for i in range(0,291):
    f.write(str(js['vertices']['sentences'][i]['text'])+'\n')
f.close()

In [None]:
f = open('ug-paragraphs.txt','w')
for i in range(0,133):
    f.write(str(js['vertices']['paragraphs'][i]['text'])+'\n')
f.close()