# NLP

In [1]:
from nltk.corpus import stopwords
from nltk import download
download('stopwords')

stop_words = stopwords.words('english')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [6]:
import tkinter as tk
from tkinter import filedialog
from tkinter import ttk
import PyPDF2
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def preprocess_text(text):
    # Tokenize the text into individual words
    tokens = nltk.word_tokenize(text)
    # Convert to lowercase
    tokens = [token.lower() for token in tokens]
    # Remove stopwords and punctuation
    stopwords_set = set(stopwords.words("english"))
    tokens = [token for token in tokens if token.isalnum() and token not in stopwords_set]
    # Join the tokens back into a single string
    preprocessed_text = " ".join(tokens)
    return preprocessed_text

def compare_answers(student_answer, ideal_answer):
    # Preprocess the student and ideal answers
    notes1 = preprocess_text(student_answer)
    notes2 = preprocess_text(ideal_answer)

    # Create a TF-IDF vectorizer
    vectorizer = TfidfVectorizer()
    # Fit the vectorizer on the preprocessed answers
    vectors = vectorizer.fit_transform([notes1, notes2])

    # Compute the cosine similarity between the vectors
    similarity = cosine_similarity(vectors[0], vectors[1])[0][0]
    return similarity

def open_text_file():
    file_path = filedialog.askopenfilename(filetypes=[("Text Files", "*.txt")])
    if file_path:
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
        text_entry.delete("1.0", tk.END)
        text_entry.insert(tk.END, text)

def open_pdf_file():
    file_path = filedialog.askopenfilename(filetypes=[("PDF Files", "*.pdf")])
    if file_path:
        with open(file_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
        compare_entry.delete("1.0", tk.END)
        compare_entry.insert(tk.END, text)
        
def compare():
    notes1 = text_entry.get("1.0", tk.END)
    notes2 = compare_entry.get("1.0", tk.END)
    similarity_score = compare_answers(notes1, notes2)
    similarity_label.config(text=f"Similarity score: {similarity_score:.2f}")    
    
    
# Create the main window
window = tk.Tk()
window.title("Notes Comparison")
window.geometry("700x400")


style = ttk.Style()
style.configure("TLabel", foreground="#333", font=("Arial", 12))
style.configure("TButton", font=("Arial", 12))
style.configure("TText", font=("Arial", 12))

# First Set of Notes
text_label = ttk.Label(window, text="First Set of Notes:")
text_label.pack()

text_entry = tk.Text(window, height=5)
text_entry.pack(pady=5)

open_text_button = ttk.Button(window, text="Open Text File", command=open_text_file)
open_text_button.pack()

# Second Set of Notes
compare_label = ttk.Label(window, text="Second Set of Notes:")
compare_label.pack()

compare_entry = tk.Text(window, height=5)
compare_entry.pack(pady=5)

open_pdf_button = ttk.Button(window, text="Open PDF File", command=open_pdf_file)
open_pdf_button.pack()

# Compare Button
compare_button = ttk.Button(window, text="Compare Notes", command=compare)
compare_button.pack(pady=10)

# Similarity Score
similarity_label = ttk.Label(window, text="Similarity score: ")
similarity_label.pack()

# Run the main event loop
window.mainloop()