## Imports

In [1]:
import pandas as pd
from IPython.display import Markdown, display

In [2]:
import _pickle as cPickle
from pathlib import Path

def dumpPickle(fileName, content):
    pickleFile = open(fileName, 'wb')
    cPickle.dump(content, pickleFile, -1)
    pickleFile.close()

def loadPickle(fileName):    
    file = open(fileName, 'rb')
    content = cPickle.load(file)
    file.close()
    
    return content
    
def pickleExists(fileName):
    file = Path(fileName)
    
    if file.is_file():
        return True
    
    return False

In [3]:
import spacy
from spacy import displacy
nlp = spacy.load('en_core_web_sm')

#Extract answers and the sentence they are in
def extractAnswers(qas, doc):
    answers = []

    senStart = 0
    senId = 0

    for sentence in doc.sents:
        senLen = len(sentence.text)

        for answer in qas:
            answerStart = answer['answers'][0]['answer_start']

            if (answerStart >= senStart and answerStart < (senStart + senLen)):
                answers.append({'sentenceId': senId, 'text': answer['answers'][0]['text']})

        senStart += senLen
        senId += 1
    
    return answers

#TODO - Clean answers from stopwords?
def tokenIsAnswer(token, sentenceId, answers):
    for i in range(len(answers)):
        if (answers[i]['sentenceId'] == sentenceId):
            if (answers[i]['text'] == token):
                return True
    return False

#Save named entities start points

def getNEStartIndexs(doc):
    neStarts = {}
    for ne in doc.ents:
        neStarts[ne.start] = ne
        
    return neStarts 

def getSentenceStartIndexes(doc):
    senStarts = []
    
    for sentence in doc.sents:
        senStarts.append(sentence[0].i)
    
    return senStarts
    
def getSentenceForWordPosition(wordPos, senStarts):
    for i in range(1, len(senStarts)):
        if (wordPos < senStarts[i]):
            return i - 1
        
def addWordsForParagrapgh(newWords, text):
    doc = nlp(text)

    neStarts = getNEStartIndexs(doc)
    senStarts = getSentenceStartIndexes(doc)
    
    #index of word in spacy doc text
    i = 0
    
    while (i < len(doc)):
        #If the token is a start of a Named Entity, add it and push to index to end of the NE
        if (i in neStarts):
            word = neStarts[i]
            #add word
            currentSentence = getSentenceForWordPosition(word.start, senStarts)
            wordLen = word.end - word.start
            shape = ''
            for wordIndex in range(word.start, word.end):
                shape += (' ' + doc[wordIndex].shape_)

            newWords.append([word.text,
                            0,
                            0,
                            currentSentence,
                            wordLen,
                            word.label_,
                            None,
                            None,
                            None,
                            shape])
            i = neStarts[i].end - 1
        #If not a NE, add the word if it's not a stopword or a non-alpha (not regular letters)
        else:
            if (doc[i].is_stop == False and doc[i].is_alpha == True):
                word = doc[i]

                currentSentence = getSentenceForWordPosition(i, senStarts)
                wordLen = 1

                newWords.append([word.text,
                                0,
                                0,
                                currentSentence,
                                wordLen,
                                None,
                                word.pos_,
                                word.tag_,
                                word.dep_,
                                word.shape_])
        i += 1

def oneHotEncodeColumns(df):
    columnsToEncode = ['NER', 'POS', "TAG", 'DEP']

    for column in columnsToEncode:
        one_hot = pd.get_dummies(df[column])
        one_hot = one_hot.add_prefix(column + '_')

        df = df.drop(column, axis = 1)
        df = df.join(one_hot)
    
    return df

In [4]:
def generateDf(text):
    words = []
    addWordsForParagrapgh(words, text)

    wordColums = ['text', 'titleId', 'paragrapghId', 'sentenceId','wordCount', 'NER', 'POS', 'TAG', 'DEP','shape']
    df = pd.DataFrame(words, columns=wordColums)
    
    return df

In [5]:
def prepareDf(df):
    #One-hot encoding
    wordsDf = oneHotEncodeColumns(df)


    #Add missing colums 
    predictorFeaturesName = 'data/pickles/nb-predictor-features.pkl'
    featureNames = loadPickle(predictorFeaturesName)

    for feature in featureNames:
        if feature not in wordsDf.columns:
            wordsDf[feature] = 0    
                
    #Drop unused columns
    columnsToDrop = ['text', 'titleId', 'paragrapghId', 'sentenceId', 'shape', 'isAnswer']
    wordsDf = wordsDf.drop(columnsToDrop, axis = 1)


    return wordsDf

In [6]:
def predictWords(wordsDf, df):
    
    predictorPickleName = 'data/pickles/nb-predictor.pkl'
    predictor = loadPickle(predictorPickleName)
    
    y_pred = predictor.predict_proba(wordsDf)

    labeledAnswers = []
    for i in range(len(y_pred)):
        labeledAnswers.append({'word': df.iloc[i]['text'], 'prob': y_pred[i][0]})
    
    return labeledAnswers

In [7]:
def blankAnswer(firstTokenIndex, lastTokenIndex, sentStart, sentEnd, doc):
    leftPartStart = doc[sentStart].idx
    leftPartEnd = doc[firstTokenIndex].idx
    rightPartStart = doc[lastTokenIndex].idx + len(doc[lastTokenIndex])
    rightPartEnd = doc[sentEnd - 1].idx + len(doc[sentEnd - 1])
    
    question = doc.text[leftPartStart:leftPartEnd] + '_____' + doc.text[rightPartStart:rightPartEnd]
    
    return question


In [8]:
def addQuestions(answers, text):
    doc = nlp(text)
    currAnswerIndex = 0
    qaPair = []

    #Check wheter each token is the next answer
    for sent in doc.sents:
        for token in sent:
            
            #If all the answers have been found, stop looking
            if currAnswerIndex >= len(answers):
                break
            
            #In the case where the answer is consisted of more than one token, check the following tokens as well.
            answerDoc = nlp(answers[currAnswerIndex]['word'])
            answerIsFound = True
            
            for j in range(len(answerDoc)):
                if token.i + j >= len(doc) or doc[token.i + j].text != answerDoc[j].text:
                    answerIsFound = False
           
            #If the current token is corresponding with the answer, add it 
            if answerIsFound:
                question = blankAnswer(token.i, token.i + len(answerDoc) - 1, sent.start, sent.end, doc)
                
                qaPair.append({'question' : question, 'answer': answers[currAnswerIndex]['word'], 'prob': answers[currAnswerIndex]['prob']})
                
                currAnswerIndex += 1
                
    return qaPair

In [9]:
def sortAnswers(qaPairs):
    orderedQaPairs = sorted(qaPairs, key=lambda qaPair: qaPair['prob'])
    
    return orderedQaPairs    

In [14]:
import os
import gensim
from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors

glove_file = 'data/embeddings/glove.6B.300d.txt'
tmp_file = 'data/embeddings/word2vec-glove.6B.300d.txt'
model = None

if os.path.isfile(glove_file):
    from gensim.scripts.glove2word2vec import glove2word2vec
    glove2word2vec(glove_file, tmp_file)
    model = KeyedVectors.load_word2vec_format(tmp_file)
else:
    print("Glove embeddings not found. Please download and place them in the following path: " + glove_file)

  glove2word2vec(glove_file, tmp_file)


In [15]:
def generate_distractors(answer, count):
    answer = str.lower(answer)
    
    ##Extracting closest words for the answer. 
    try:
        closestWords = model.most_similar(positive=[answer], topn=count)
    except:
        #In case the word is not in the vocabulary, or other problem not loading embeddings
        return []

    #Return count many distractors
    distractors = list(map(lambda x: x[0], closestWords))[0:count]
    
    return distractors

In [16]:
def addDistractors(qaPairs, count):
    if not model:
        print("Glove embeddings not found. Please download and place them in the following path: " + glove_file)
    
    for qaPair in qaPairs:
        distractors = generate_distractors(qaPair['answer'], count)
        qaPair['distractors'] = distractors
    
    return qaPairs

In [17]:
def generateQuestions(text, count):
    
    # Extract words 
    df = generateDf(text)
    wordsDf = prepareDf(df)
    print(wordsDf)
    
    # Predict 
    labeledAnswers = predictWords(wordsDf, df)
    
    # Transform questions
    qaPairs = addQuestions(labeledAnswers, text)
    
    # Pick the best questions
    orderedQaPairs = sortAnswers(qaPairs)
    
    # Generate distractors
    questions = addDistractors(orderedQaPairs[:count], 4)
    print(questions)
    # Print
    for i in range(count):
        display(Markdown('### Question ' + str(i + 1) + ':'))
        print(questions[i]['question'])

        display(Markdown('#### Answer:'))
        print(questions[i]['answer'])
        
        display(Markdown('#### Incorrect answers:'))
        for distractor in questions[i]['distractors']:
            print(distractor)
        
        print()

In [18]:
text = "Oxygen is a chemical element with symbol O and atomic number 8. It is a member of the chalcogen group on the periodic table, a highly reactive nonmetal, and an oxidizing agent that readily forms oxides with most elements as well as with other compounds. By mass, oxygen is the third-most abundant element in the universe, after hydrogen and helium. At standard temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless diatomic gas with the formula O2. Diatomic oxygen gas constitutes 20.8% of the Earth's atmosphere. As compounds including oxides, the element makes up almost half of the Earth's crust."

generateQuestions(text, 10)

    wordCount  NER_CARDINAL  NER_LOC  NER_ORDINAL  NER_PERCENT  POS_ADJ  \
0           1         False    False        False        False    False   
1           1         False    False        False        False    False   
2           1         False    False        False        False    False   
3           1         False    False        False        False    False   
4           1         False    False        False        False    False   
5           1         False    False        False        False     True   
6           1         False    False        False        False    False   
7           1          True    False        False        False    False   
8           1         False    False        False        False    False   
9           1         False    False        False        False    False   
10          1         False    False        False        False    False   
11          1         False    False        False        False     True   
12          1         Fal

  array.dtypes.apply(is_sparse).any()):


[{'question': 'Oxygen is a _____ element with symbol O and atomic number 8.', 'answer': 'chemical', 'prob': 0.0, 'distractors': ['chemicals', 'biological', 'toxic', 'compounds']}, {'question': 'Oxygen is a chemical element with _____ O and atomic number 8.', 'answer': 'symbol', 'prob': 0.0, 'distractors': ['symbols', 'symbolizes', 'symbolized', 'symbolize']}, {'question': 'At standard temperature and pressure, two atoms of the _____ bind to form dioxygen, a colorless and odorless diatomic gas with the formula O2.', 'answer': 'element', 'prob': 0.0, 'distractors': ['elements', 'component', 'aspect', 'dimension']}, {'question': 'At standard temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless diatomic _____ with the formula O2.', 'answer': 'gas', 'prob': 0.0, 'distractors': ['oil', 'gasoline', 'natural', 'fuel']}, {'question': 'At standard temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless diato

### Question 1:

Oxygen is a _____ element with symbol O and atomic number 8.


#### Answer:

chemical


#### Incorrect answers:

chemicals
biological
toxic
compounds



### Question 2:

Oxygen is a chemical element with _____ O and atomic number 8.


#### Answer:

symbol


#### Incorrect answers:

symbols
symbolizes
symbolized
symbolize



### Question 3:

At standard temperature and pressure, two atoms of the _____ bind to form dioxygen, a colorless and odorless diatomic gas with the formula O2.


#### Answer:

element


#### Incorrect answers:

elements
component
aspect
dimension



### Question 4:

At standard temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless diatomic _____ with the formula O2.


#### Answer:

gas


#### Incorrect answers:

oil
gasoline
natural
fuel



### Question 5:

At standard temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless diatomic gas with the _____ O2.


#### Answer:

formula


#### Incorrect answers:

f1
formulas
prix
ferrari



### Question 6:

Diatomic _____ gas constitutes 20.8% of the Earth's atmosphere.


#### Answer:

oxygen


#### Incorrect answers:

hydrogen
nitrogen
helium
nutrients



### Question 7:

As compounds _____ oxides, the element makes up almost half of the Earth's crust.


#### Answer:

including


#### Incorrect answers:

include
included
other
such



### Question 8:

It is a member of the chalcogen group on the periodic table, a _____ reactive nonmetal, and an oxidizing agent that readily forms oxides with most elements as well as with other compounds.


#### Answer:

highly


#### Incorrect answers:

extremely
very
most
less



### Question 9:

It is a member of the chalcogen group on the periodic table, a highly reactive nonmetal, and an oxidizing agent that _____ forms oxides with most elements as well as with other compounds.


#### Answer:

readily


#### Incorrect answers:

easily
accessible
concedes
cannot



### Question 10:

It is a member of the _____ group on the periodic table, a highly reactive nonmetal, and an oxidizing agent that readily forms oxides with most elements as well as with other compounds.


#### Answer:

chalcogen


#### Incorrect answers:




In [37]:
from flask import Flask, render_template, request, redirect, url_for , jsonify
import json
import utils
import os
# from gensim.models import KeyedVectors


app = Flask(__name__)

# glove_file = 'data/embeddings/glove.6B.300d.txt'
# tmp_file = 'data/embeddings/word2vec-glove.6B.300d.txt'
# model = None

# if os.path.isfile(glove_file):
#     from gensim.scripts.glove2word2vec import glove2word2vec
#     glove2word2vec(glove_file, tmp_file)
#     model = KeyedVectors.load_word2vec_format(tmp_file)
# else:
#     print("Glove embeddings not found. Please download and place them in the following path: " + glove_file)

global answers
global quiz_data_v2
global questionText

questionText = ""

with open("details.json", "r") as jf:
    data = json.load(jf)
    users = data["login"]

@app.route('/', methods=['GET', 'POST'])
def login():
    if request.method == 'POST':
        # Retrieve form data
        email = request.form.get('login-email')
        password = request.form.get('login-password')

        # Do something with the form data (e.g., authentication)
        if email in users.keys() and users[email]==password:
            # Redirect to generate_qna.html
            return redirect(url_for('generate_qna'))

    return render_template('auth.html')


@app.route('/signup', methods=['GET', 'POST'])
def signup():
    if request.method == 'POST':
        # Retrieve form data
        email = request.form.get('signup-email')
        password = request.form.get('signup-password')
        password2 = request.form.get('confirm-signup-password')

        # Perform validation or other actions with the form data
        with open("details.json", "w") as jfk:
            users[email] = password
            json.dump(users, jfk)
       
        # Print form data (for demonstration purposes)
        print(f'Email: {email}, Password: {password} ,  password2 : { password2}')
        
        # Add your logic for user registration or other actions
        return redirect(url_for('login'))

    return render_template('auth.html')




@app.route('/create_post' , methods=['GET', 'POST'] )
def create_post():
    # Additional logic for generating Q&A or render a template
     if request.method == 'POST':
            # Retrieve form data
        new_post = request.form.get('post-input')
       

        # Perform validation or other actions with the form data
       
        # Print form data (for demonstration purposes)
        print(f'new_post: {new_post}')
        return redirect(url_for('forum'))
    



@app.route('/generate_qna' , methods=['GET', 'POST'])
def generate_qna():
    # Additional logic for generating Q&A or render a template
    question_data = [
    {"subject": "Math", "num_questions": 10},
    {"subject": "Science", "num_questions": 10},
    {"subject": "Python", "num_questions": 10},
    {"subject": "Networks", "num_questions": 10},
    # Add more data as needed
]

    return render_template('generate_qna.html' , questions=question_data)


@app.route('/answer_evaluation')
def answer_evaluation():
    subj_data = data["scores"]
    subjects_data = [
        {"subject": list(subj_data.keys())[0], "score": subj_data[list(subj_data.keys())[0]]},
        {"subject": list(subj_data.keys())[1], "score": subj_data[list(subj_data.keys())[1]]},
        {"subject": list(subj_data.keys())[2], "score": subj_data[list(subj_data.keys())[2]]},
    ]

    return render_template('answer_evaluation.html', subjects_data=subjects_data)
    # Additional logic for generating Q&A or render a template
  


@app.route('/forum')
def forum():
    # Additional logic for generating Q&A or render a template
    return render_template('forum.html')


@app.route('/quiz' , methods=['GET', 'POST'])
def quiz():
    global answers
    global quiz_data_v2
    global questionText
    quiz_data = [

    {
        "question": "For Fiscal year 2021, what was your total GhG Carbon Emission for all scopes?",
        "distractors": [
            "Scope 1 - Determine environmental impact levels.",
            "Scope 2 - Reduce carbon footprints.",
            "Scope 3 - Enhance environmental impacts on a larger scale.",
            "I do not know the answer to this question.",
        ],
    },
    {
        "question": "Another question?",
        "distractors": [
            "Option 1",
            "Option 2",
            "Option 3",
            "I do not know the answer to this question.",
        ],
    },


    # Add more questions and answers as needed
    ]
    if request.method == 'POST':
        print("post method ************")
        global questionText
        questionText = request.form.get('questionText')
        print(f"Launching quiz for subject: {questionText}")
        quiz_data = utils.generateQuestions(questionText, 10, model=model)

        quiz_data_v2 = []
        answers = []
        for item in quiz_data:
            # d = ast.literal_eval(item)
            js_data = {
                "question": item['question'],
                "answers": [item['answer']]+item['distractors'][1:]
            }
            quiz_data_v2.append(js_data)
            answers.append(item['answer'])
        print(quiz_data_v2)
        print("Answers:", answers)

        # return redirect(url_for('quiz'))
        render_template('quiz.html' , quiz_data=quiz_data_v2)
    
    return render_template('quiz.html', quiz_data=quiz_data_v2)
            



@app.route('/quiz/respnse', methods=['GET', 'POST'])
def quiz_response():
    if request.method == 'POST':
        # Form data submitted, process it
        question = request.form.getlist('question')
        selected_answers = request.form.getlist('selected-answer')
        # Do something with selected_answers

        score = 0
        for i in range(len(answers)):
            if answers[i]==selected_answers[i]:
                score += 1

        # For now, just print the selected answers
        print("Question:", question)
        print("Selected Answers:", selected_answers)
        result = "fail" if score<5 else "pass"
        # You can redirect to a different page or render a new template
        return render_template('quiz.html', result=result)

    # If it's a GET request, render the quiz form
    # return render_template('quiz.html', quiz_data=quiz_data)

        

@app.route('/logout')
def logout():
    # Additional logic for generating Q&A or render a template
    return redirect(url_for('login'))




if __name__ == '__main__':
    app.run(debug=False)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit


127.0.0.1 - - [01/Nov/2023 15:30:13] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [01/Nov/2023 15:30:13] "GET /static/css/auth.css HTTP/1.1" 304 -
127.0.0.1 - - [01/Nov/2023 15:30:13] "GET /static/js/auth.js HTTP/1.1" 304 -
127.0.0.1 - - [01/Nov/2023 15:30:13] "GET /static/Assets/Container.png HTTP/1.1" 304 -
127.0.0.1 - - [01/Nov/2023 15:30:17] "POST / HTTP/1.1" 302 -
127.0.0.1 - - [01/Nov/2023 15:30:17] "GET /generate_qna HTTP/1.1" 200 -
127.0.0.1 - - [01/Nov/2023 15:30:18] "GET /static/css/generate_qna.css HTTP/1.1" 304 -


post method ************
Launching quiz for subject: Oxygen is a chemical element with symbol O and atomic number 8. It is a member of the chalcogen group on the periodic table, a highly reactive nonmetal, and an oxidizing agent that readily forms oxides with most elements as well as with other compounds. By mass, oxygen is the third-most abundant element in the universe, after hydrogen and helium. At standard temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless diatomic gas with the formula O2. Diatomic oxygen gas constitutes 20.8% of the Earth's atmosphere. As compounds including oxides, the element makes up almost half of the Earth's crust.
    wordCount  NER_CARDINAL  NER_LOC  NER_ORDINAL  NER_PERCENT  POS_ADJ  \
0           1         False    False        False        False    False   
1           1         False    False        False        False    False   
2           1         False    False        False        False    False   
3 

  array.dtypes.apply(is_sparse).any()):
127.0.0.1 - - [01/Nov/2023 15:30:24] "POST /quiz HTTP/1.1" 200 -
127.0.0.1 - - [01/Nov/2023 15:30:24] "GET /static/css/quiz.css HTTP/1.1" 304 -


[{'question': 'Oxygen is a _____ element with symbol O and atomic number 8.', 'answers': ['chemical', 'biological', 'toxic', 'compounds']}, {'question': 'Oxygen is a chemical element with _____ O and atomic number 8.', 'answers': ['symbol', 'symbolizes', 'symbolized', 'symbolize']}, {'question': 'At standard temperature and pressure, two atoms of the _____ bind to form dioxygen, a colorless and odorless diatomic gas with the formula O2.', 'answers': ['element', 'component', 'aspect', 'dimension']}, {'question': 'At standard temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless diatomic _____ with the formula O2.', 'answers': ['gas', 'gasoline', 'natural', 'fuel']}, {'question': 'At standard temperature and pressure, two atoms of the element bind to form dioxygen, a colorless and odorless diatomic gas with the _____ O2.', 'answers': ['formula', 'formulas', 'prix', 'ferrari']}, {'question': "Diatomic _____ gas constitutes 20.8% of the Earth's 

127.0.0.1 - - [01/Nov/2023 15:30:44] "POST /quiz/respnse HTTP/1.1" 200 -
127.0.0.1 - - [01/Nov/2023 15:30:44] "GET /static/css/quiz.css HTTP/1.1" 304 -


Question: ["{'question': 'Oxygen is a _____ element with symbol O and atomic number 8.', 'answers': ['chemical', 'biological', 'toxic', 'compounds']}", "{'question': 'Oxygen is a _____ element with symbol O and atomic number 8.', 'answers': ['chemical', 'biological', 'toxic', 'compounds']}", "{'question': 'Oxygen is a _____ element with symbol O and atomic number 8.', 'answers': ['chemical', 'biological', 'toxic', 'compounds']}", "{'question': 'Oxygen is a _____ element with symbol O and atomic number 8.', 'answers': ['chemical', 'biological', 'toxic', 'compounds']}", "{'question': 'Oxygen is a chemical element with _____ O and atomic number 8.', 'answers': ['symbol', 'symbolizes', 'symbolized', 'symbolize']}", "{'question': 'Oxygen is a chemical element with _____ O and atomic number 8.', 'answers': ['symbol', 'symbolizes', 'symbolized', 'symbolize']}", "{'question': 'Oxygen is a chemical element with _____ O and atomic number 8.', 'answers': ['symbol', 'symbolizes', 'symbolized', 'sy

127.0.0.1 - - [01/Nov/2023 15:30:46] "GET /quiz HTTP/1.1" 200 -
127.0.0.1 - - [01/Nov/2023 15:30:46] "GET /static/css/quiz.css HTTP/1.1" 304 -
127.0.0.1 - - [01/Nov/2023 15:30:51] "GET /logout HTTP/1.1" 302 -
127.0.0.1 - - [01/Nov/2023 15:30:51] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [01/Nov/2023 15:30:51] "GET /static/js/auth.js HTTP/1.1" 304 -
127.0.0.1 - - [01/Nov/2023 15:30:51] "GET /static/css/auth.css HTTP/1.1" 304 -
127.0.0.1 - - [01/Nov/2023 15:30:51] "GET /static/Assets/Container.png HTTP/1.1" 304 -
