In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Load SBERT model
sbert_model = SentenceTransformer('paraphrase-mpnet-base-v2')

# Load dataset
df = pd.read_csv("data/Final_Dataset.csv")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
def preprocess_text(text):
    """ Convert text to lowercase, remove punctuation, and lemmatize. """
    doc = nlp(text.lower())
    return " ".join([token.lemma_ for token in doc if not token.is_punct])

# Function to remove words present in the question (Demotion)
def demote_text(student_answer, question):
    """ Remove words from student answer that appear in the question """
    question_words = set(preprocess_text(question).split())
    student_words = preprocess_text(student_answer).split()
    demoted_text = " ".join([word for word in student_words if word not in question_words])
    return demoted_text


In [12]:
def evaluate_student_answer(question, student_answer):
    # Find the reference answer for the question
    ref_answer = df[df["question"] == question]["ref_answer"].values[0]
    
    # Preprocess
    student_modified = preprocess_text(student_answer)
    ref_modified = preprocess_text(ref_answer)
    
    student_demoted = demote_text(student_answer, question)
    ref_demoted = demote_text(ref_answer, question)
    
    # Compute length ratio
    length_ratio = len(student_modified.split()) / max(1, len(ref_modified.split()))
    
    # Compute embeddings
    embed_ref_modified = sbert_model.encode(ref_modified).tolist()
    embed_stud_modified = sbert_model.encode(student_modified).tolist()
    
    embed_ref_demoted = sbert_model.encode(ref_demoted).tolist()
    embed_stud_demoted = sbert_model.encode(student_demoted).tolist()
    
    # Compute cosine similarity
    cos_similarity_modified = cosine_similarity([embed_ref_modified], [embed_stud_modified])[0][0]
    cos_similarity_demo = cosine_similarity([embed_ref_demoted], [embed_stud_demoted])[0][0]

    # Assign Grade
    def assign_grades(cos_sim):
        if cos_sim > 0.69:
            return "Completely Correct", 2
        elif cos_sim > 0.395998348:
            return "Partially Incorrect", 1
        else:
            return "Incorrect", 0
    
    grade_text, grades_auto = assign_grades(cos_similarity_modified)

    # Store results in a dictionary
    result = {
        "question": question,
        "student_answer": student_answer,
        "grades_round": grades_auto,
        "grade_text": grade_text,
        "student_modified": student_modified,
        "ref_answer": ref_answer,
        "qn_modified": preprocess_text(question),
        "ref_modified": ref_modified,
        "student_demoted": student_demoted,
        "ref_demoted": ref_demoted,
        "length_ratio": length_ratio,
        "cos_similarity_modified": cos_similarity_modified,
        "cos_similarity_demo": cos_similarity_demo
    }
    
    return result


In [20]:
# Example Input
question_input = " Give a definition for the term \"artificial neural network\" and mention, how it resembles the human brain!"
student_answer_input = "Artifical Neural Network."

# Evaluate the new student answer
result = evaluate_student_answer(question_input, student_answer_input)

# Print Results
for key, value in result.items():
    print(f"{key}: {value}")


question:  Give a definition for the term "artificial neural network" and mention, how it resembles the human brain!
student_answer: Artifical Neural Network.
grades_round: 1
grade_text: Partially Incorrect
student_modified: artifical neural network
ref_answer: A neural network is a massively parallel distributed processor which is made up of simple processing units. It has a natural propensity for storing experiential knowledge. Neural networks resemble the brain in two aspects; knowledge is acquired by the network from its environment through a learning process, interneuron connection strength known as synaptic weights are used to store the acquired knowledge.
qn_modified:   give a definition for the term artificial neural network and mention how it resemble the human brain
ref_modified: a neural network be a massively parallel distribute processor which be make up of simple processing unit it have a natural propensity for store experiential knowledge neural network resemble the brai

In [21]:
print(df.columns)


Index(['\', 'question', 'student_answer', 'grades_round', 'student_modified',
       'ref_answer', 'qn_modified', 'ref_modified', 'student_demoted',
       'ref_demoted', 'length_ratio', 'embed_ref', 'embed_stud',
       'embed_ref_demoted', 'embed_stud_demoted', 'aligned', 'aligned_demoted',
       'cos_similarity', 'cos_similarity_demo', 'aligned_score',
       'aligned_score_demo', 'question_id', 'embed_ref_modified',
       'embed_stud_modified', 'cos_similarity_modified', 'grade_text',
       'grades_auto'],
      dtype='object')
