# Cheat Detector
Similarity-based cheat detector for written exams with multiple questions.

In [None]:
import cheat_detector
from cheat_detector.graph_utils import *
from cheat_detector.model import TextSimilarityModel
from cheat_detector.visualization_utils import plot_edge_list

import numpy as np
from IPython.display import clear_output, display
import ipywidgets
from natsort import natsorted
import zipfile
import io

In [None]:
text_similarity_model = TextSimilarityModel()

In [None]:
file_upload_button = ipywidgets.FileUpload()

In [None]:
class ThresholdSlider(object):

    def __init__(self, value=0.5, min=0, max=1, step=0.05, **kwargs):
        self.widget = self.create_widget(
            value, min, max, step, **kwargs
        )
        self.output = ipywidgets.Output()
        self.has_been_instantiated = False

    def observe(self, function, names):
        self.widget.observe(function, names=names)

    def create_widget(self, value, min, max, step, **kwargs):
        return ipywidgets.FloatSlider(
            value=0.5,  # default value
            min=0, max=1, step=0.05,
            **kwargs
        )

    def display(self):
        if self.has_been_instantiated:
            with self.output:
                clear_output(True)
                display(self.widget)
        else:
            display(self.output)
            self.has_been_instantiated = True

threshold_slider = ThresholdSlider()

In [None]:
class WhichQuestionsWidget(object):

    def __init__(self):
        self.widget = self.create_widget(1, None, None)
        self.output = ipywidgets.Output()
        self.has_been_instantiated = False

    def observe(self, function, names):
        self.function_to_observe = function
        self.names_to_observe = names

    def create_widget(self, question_quantity, function_to_observe, names_to_observe):
        which_questions = [i+1 for i in range(question_quantity)]
        widget = ipywidgets.SelectMultiple(
            options=which_questions,
            value=which_questions,  # default value
            disabled=False
        )
        widget.observe(function_to_observe, names_to_observe)
        return widget

    def display(self, question_quantity):
        if self.has_been_instantiated:
            self.widget = self.create_widget(
                question_quantity, self.function_to_observe, self.names_to_observe
            )
            with self.output:
                clear_output(True)
                display(self.widget)
        else:
            display(self.output)
            self.has_been_instantiated = True


which_questions_widget = WhichQuestionsWidget()

In [None]:
closest_students_plot = ipywidgets.Output()

## How to use
For the correct functioning of the cheat detector, it is necessary to send a zip file that contains each student exam. Each exam should be a text file (.txt) in the following format. Don't forget the blank line between each question answer.

```
Answer to question 1

Answer to question 2

Answer to question 3
```

Here's an [unzipped folder](https://github.com/projeto-de-algoritmos/Grafos1_CheatDetector/tree/main/mock_data/prova_2) example.

In [None]:
display(file_upload_button)

FileUpload(value={}, description='Upload')

### Similarity Threshold

In [None]:
threshold_slider.display()

Output()

### Which questions should be considered?

In [None]:
which_questions_widget.display(question_quantity=1)

Output()

### Closest Students Graph


In [None]:
display(closest_students_plot)

Output()

In [None]:
def plot_graph(unuseful_arg):
    global closest_students_plot
    threshold = threshold_slider.widget.value
    selected_questions = np.asarray(which_questions_widget.widget.value)

    if selected_questions.shape[0] <= 0:
        return

    selected_edge_lists = [list_of_edge_lists[i-1] for i in selected_questions]
    mean_edge_list = calc_mean_edge_list(selected_edge_lists)
    closest_students = closest_edges(mean_edge_list, threshold)

    with closest_students_plot:
        clear_output(True)
        plot_edge_list(closest_students)

In [None]:
def txt_to_exam(txt):
    # split its text into lines
    # (where each line is a question answer)
    student_exam = []
    for line in txt.splitlines():
        # check if line is empty
        if line == b'':
            continue
        student_exam.append(line.decode())
    return student_exam


def zip_bytes_to_exam_list(zip_bytes):
    exam_list = []
    # decompress zip bytes
    zip_file = zipfile.ZipFile(io.BytesIO(zip_bytes), mode="r")
    # iterate over the sorted filenames in the zip
    for filename in natsorted(zip_file.namelist()):
        # check if file is a .txt file
        if '.txt' != filename[-4:]:
            continue
        # open the file and convert the its text to the exam format
        student_exam = txt_to_exam(zip_file.read(filename))
        exam_list.append(student_exam)
    zip_file.close()
    return np.asarray(exam_list)


def on_zip_upload(unuseful_arg):
    global list_of_edge_lists
    zip_bytes = file_upload_button.data[-1]
    exam_list = zip_bytes_to_exam_list(zip_bytes)

    question_answers_list = exam_list.T

    sim_matrix_list = np.asarray([
        text_similarity_model.predict_sim_matrix(question_answers)
        for question_answers in question_answers_list
    ])
    list_of_edge_lists = [
        matrix_to_edge_list(sim_matrix, sim_matrix.shape[0])
        for sim_matrix in sim_matrix_list
    ]

    # plot graph for the first time
    plot_graph(unuseful_arg=None)

    threshold_slider.display()
    which_questions_widget.display(
        question_quantity=len(list_of_edge_lists)
    )

In [None]:
file_upload_button.observe(on_zip_upload, names=['data'])
threshold_slider.observe(plot_graph, names='value')
which_questions_widget.observe(plot_graph, names='value')