In [None]:
import speech_recognition as sr
import nltk
from nltk.corpus import stopwords
from heapq import nlargest
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from gensim.summarization import summarize

def split_into_sentences(line, max_length):
    words = line.split()
    sentences = []
    current_sentence = ""

    for word in words:
        if len(current_sentence) + len(word) + 1 <= max_length:  # 1 for space between words
            current_sentence += word + " "
        else:
            sentences.append(current_sentence.strip() + ".")
            current_sentence = word + " "

    # Append the remaining sentence
    if current_sentence:
        sentences.append(current_sentence.strip() + ".")

    return '\n'.join(sentences)

# Example usage:


r = sr.Recognizer()

# Capture the audio from the microphone
with sr.Microphone() as source:
    print("Speak:")
    audio = r.listen(source)
    print("Time over, thanks")

# Recognize the speech
text = r.recognize_google(audio)

print("You said:", text)

result = split_into_sentences(text, 80)


def summarise(text, n):
    # Split the text into segments based on pauses
    segments = text.split(', ')
    
    # Tokenize each segment into sentences
    sentences = []
    for segment in segments:
        sentences.extend(nltk.sent_tokenize(segment))
    
    # Tokenize each sentence into words and remove stop words
    stop_words = set(stopwords.words('english'))
    words = [nltk.word_tokenize(sent.lower()) for sent in sentences]
    words = [[word for word in sent if word not in stop_words] for sent in words]
    
    # Calculate the score of each sentence based on word frequency
    word_frequencies = {}
    for sent in words:
        for word in sent:
            if word not in word_frequencies:
                word_frequencies[word] = 1
            else:
                word_frequencies[word] += 1
    max_freq = max(word_frequencies.values())
    for word in word_frequencies:
        word_frequencies[word] /= max_freq
    sentence_scores = {}
    for i, sent in enumerate(words):
        for word in sent:
            if word in word_frequencies:
                if i not in sentence_scores:
                    sentence_scores[i] = word_frequencies[word]
                else:
                    sentence_scores[i] += word_frequencies[word]
    
    # Select the top n sentences with the highest scores
    num_sentences = max(1, int(n * len(sentences)))  # Ensure at least one sentence is selected
    summary_idxs = nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
    summary_idxs = sorted(summary_idxs)
    
    # Join the selected sentences into a summary string
    summary = ' '.join([sentences[i] for i in summary_idxs])
    
    return summary

summary = summarise(result, 0.4)
print("\n\nSummarised text:", summary)