In [49]:
import tkinter as tk
from tkinter import scrolledtext, messagebox
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import csv



In [50]:
class PlagiarismDetector:
    def __init__(self, csv_file_path):
        self.csv_file_path = csv_file_path
        self.vectorizer = TfidfVectorizer()
        self.tfidf_matrix = None
        self.unique_ids = None
        self.load_data()

    def load_data(self):
        with open(self.csv_file_path, 'r') as f:
            reader = csv.reader(f)
            csv_data = list(reader)

        header = csv_data.pop(0)
        self.unique_ids = [row[0] for row in csv_data]
        texts = [row[1] for row in csv_data]
        self.tfidf_matrix = self.vectorizer.fit_transform(texts)

    def check_plagiarism(self, input_text, threshold=0.8):
        input_vector = self.vectorizer.transform([input_text])

        plagiarism_detected = False
        for id_a, text_vector_a in zip(self.unique_ids, self.tfidf_matrix):
            sim_score = cosine_similarity(text_vector_a, input_vector)
            if sim_score[0][0] > threshold:
                plagiarism_detected = True
                break

        return plagiarism_detected



In [51]:
class PlagiarismApp:
    def __init__(self, master):
        self.master = master
        master.title("Plagiarism Detection")

        self.detector = PlagiarismDetector('msrp-annotations.csv')

        self.input_text_area = scrolledtext.ScrolledText(master, width=40, height=10, wrap=tk.WORD)
        self.input_text_area.pack(pady=10)

        self.check_button = tk.Button(master, text="Check Plagiarism", command=self.check_plagiarism)
        self.check_button.pack()

    def check_plagiarism(self):
        input_text = self.input_text_area.get("1.0", "end-1c")
        plagiarism_detected = self.detector.check_plagiarism(input_text)

        if plagiarism_detected:
            messagebox.showinfo("Plagiarism Detection", "Plagiarism Detected!")
        else:
            messagebox.showinfo("Plagiarism Detection", "No Plagiarism Detected.")



In [53]:
if __name__ == "__main__":
    root = tk.Tk()
    app = PlagiarismApp(root)
    root.mainloop()
