In [21]:
import numpy as np
import pandas as pd

import tkinter as tk
from tkinter import scrolledtext
from tkinter import messagebox
from tkinter import END
from nltk.tokenize import sent_tokenize

# Importing the summarization code
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from nltk.corpus import stopwords
from string import punctuation
from collections import Counter
from heapq import nlargest
import networkx as nx

def summarize_text():
    # Get text from the text box
    text = text_box.get("1.0", "end-1c")
    
    # Loading spaCy model
    nlp = spacy.load('en_core_web_sm')
    
    # Tokenization and removing stopwords    
    doc = nlp(text)
    tokens = [token.text.lower() for token in doc 
              if not token.is_stop and not token.is_punct and token.text != '\n']

    # Calculating word frequency 
    word_freq = Counter(tokens)
    if not word_freq:
        messagebox.showerror("Error", "No words found in the text.")
        return
    
    # Sentence tokenization
    sent_token = [sent.text for sent in doc.sents] #sentences

    # Загрузка модели GloVe
    word_embeddings = {}
    f = open('glove.6B.100d.txt', encoding='utf-8')
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        word_embeddings[word] = coefs
    f.close()

    clean_sentences = pd.Series(sent_token).str.replace("[^a-zA-Z]", " ")
    clean_sentences = [s.lower() for s in clean_sentences]
    stop_words = stopwords.words('english')
    def remove_stopwords(sen):
        sen_new = " ".join([i for i in sen if i not in stop_words])
        return sen_new
    clean_sentences = [remove_stopwords(r.split()) for r in clean_sentences]

    # Create vectors for the sentences
    sentence_vectors = []
    for i in clean_sentences:
        if len(i) != 0:
            v = sum([word_embeddings.get(w, np.zeros((100,))) for w in i.split()])/(len(i.split())+0.001)
        else:
            v = np.zeros((100,))
        sentence_vectors.append(v)
        
    # Find similarities between the sentences by using the cosine similarity approach
    sim_mat = np.zeros([len(sent_token), len(sent_token)])
    from sklearn.metrics.pairwise import cosine_similarity
    for i in range(len(sent_token)):
        for j in range(len(sent_token)):
            if i != j:
                sim_mat[i][j] = cosine_similarity(sentence_vectors[i].reshape(1,100), sentence_vectors[j].reshape(1,100))[0,0]

    # Convert the similarity matrix into the graph, the nodes in this graph will represent the sentences and the edges will represent the similarity scores between the sentences
    nx_graph = nx.from_numpy_array(sim_mat)
    scores = nx.pagerank(nx_graph)
    
    for index, n in enumerate(sent_token):
        scores[n] = scores.pop(index)
    
    # Summarize text
    # Select top-scoring sentences based on user input
    num_sentences = int(num_sentences_entry.get())
    summarized_sentences = nlargest(num_sentences, scores, key=scores.get)
    
    # Display summarized text in the result box
    result_box.delete(1.0, END)
    result_box.insert(END, " ".join(summarized_sentences))

# GUI setup
root = tk.Tk()
root.title("Text Summarizer")

# Text box for input
text_box = scrolledtext.ScrolledText(root, width=70, height=15, wrap=tk.WORD)
text_box.pack(pady=10)

# Entry field for the number of sentences
num_sentences_label = tk.Label(root, text="Number of Sentences:")
num_sentences_label.pack()
num_sentences_entry = tk.Entry(root, width=10)
num_sentences_entry.insert(END, "3")  # Default value
num_sentences_entry.pack()

# Button to summarize
summarize_button = tk.Button(root, text="Summarize", command=summarize_text)
summarize_button.pack(pady=5)

# Result box for output
result_box = scrolledtext.ScrolledText(root, width=70, height=8, wrap=tk.WORD)
result_box.pack(pady=10)

root.mainloop()