In [1]:
#  Importing Necessary Libraries
import spacy
from nltk.corpus import stopwords
import heapq

In [2]:
# Load the SpaCy model for English
nlp = spacy.load('en_core_web_sm')

In [3]:
# Example text
text = """
The Orbiter Discovery, OV-103, is considered eligible for listing in the National Register of Historic Places 
(NRHP) in the context of the U.S. Space Shuttle Program (1969-2011) under Criterion A in the areas of Space 
Exploration and Transportation and under Criterion C in the area of Engineering. Because it has achieved significance 
within the past fifty years, Criteria Consideration G applies. Under Criterion A, Discovery is significant as the oldest 
of the three extant orbiter vehicles constructed for the Space Shuttle Program (SSP), the longest running American 
space program to date; she was the third of five orbiters built by NASA. Unlike the Mercury, Gemini, and Apollo 
programs, the SSP’s emphasis was on cost effectiveness and reusability, and eventually the construction of a space station. 
Including her maiden voyage (launched August 30, 1984), Discovery flew to space thirty-nine times, more than any of the other 
four orbiters; she was also the first orbiter to fly twenty missions. She had the honor of being chosen as the Return to Flight 
vehicle after both the Challenger and Columbia accidents. Discovery was the first shuttle to fly with the redesigned SRBs, a result 
of the Challenger accident, and the first shuttle to fly with the Phase II and Block I SSME. Discovery also carried the Hubble Space 
Telescope to orbit and performed two of the five servicing missions to the observatory. She flew the first and last dedicated 
Department of Defense (DoD) missions, as well as the first unclassified defense-related mission. In addition, Discovery was vital to 
the construction of the International Space Station (ISS); she flew thirteen of the thirty-seven total missions flown to the station 
by a U.S. Space Shuttle. She was the first orbiter to dock to the ISS, and the first to perform an exchange of a resident crew. Under 
Criterion C, Discovery is significant as a feat of engineering. According to Wayne Hale, a flight director from Johnson Space Center, 
the Space Shuttle orbiter represents a “huge technological leap from expendable rockets and capsules to a reusable, winged, hypersonic, 
cargo-carrying spacecraft.” Although her base structure followed a conventional aircraft design, she used advanced materials that both 
minimized her weight for cargo-carrying purposes and featured low thermal expansion ratios, which provided a stable base for her Thermal 
Protection System (TPS) materials. The Space Shuttle orbiter also featured the first reusable TPS; all previous spaceflight vehicles had 
a single-use, ablative heat shield. Other notable engineering achievements of the orbiter included the first reusable orbital propulsion system, 
and the first two-fault-tolerant Integrated Avionics System. As Hale stated, the Space Shuttle remains “the largest, fastest, winged hypersonic aircraft 
in history,” having regularly flown at twenty-five times the speed of sound.
"""

# Process the text with SpaCy
doc = nlp(text)

In [4]:
import nltk
import ssl

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

nltk.download('punkt')
nltk.download('stopwords')

# Tokenize the text and remove stopwords
stop_words = set(stopwords.words('english'))
filtered_tokens = [token.text for token in doc if token.is_alpha and token.text.lower() not in stop_words]

# Print tokenized words
print("Filtered Words:", filtered_tokens)

Filtered Words: ['Orbiter', 'Discovery', 'considered', 'eligible', 'listing', 'National', 'Register', 'Historic', 'Places', 'NRHP', 'context', 'Space', 'Shuttle', 'Program', 'Criterion', 'areas', 'Space', 'Exploration', 'Transportation', 'Criterion', 'C', 'area', 'Engineering', 'achieved', 'significance', 'within', 'past', 'fifty', 'years', 'Criteria', 'Consideration', 'G', 'applies', 'Criterion', 'Discovery', 'significant', 'oldest', 'three', 'extant', 'orbiter', 'vehicles', 'constructed', 'Space', 'Shuttle', 'Program', 'SSP', 'longest', 'running', 'American', 'space', 'program', 'date', 'third', 'five', 'orbiters', 'built', 'NASA', 'Unlike', 'Mercury', 'Gemini', 'Apollo', 'programs', 'SSP', 'emphasis', 'cost', 'effectiveness', 'reusability', 'eventually', 'construction', 'space', 'station', 'Including', 'maiden', 'voyage', 'launched', 'August', 'Discovery', 'flew', 'space', 'thirty', 'nine', 'times', 'four', 'orbiters', 'also', 'first', 'orbiter', 'fly', 'twenty', 'missions', 'honor'

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pawel\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\pawel\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
# Split the text into sentences using SpaCy
sentences = [sent.text for sent in doc.sents]
print("Sentences:", sentences)

Sentences: ['\nThe Orbiter Discovery, OV-103, is considered eligible for listing in the National Register of Historic Places \n(NRHP) in the context of the U.S. Space Shuttle Program (1969-2011) under Criterion A in the areas of Space \nExploration and Transportation and under Criterion C in the area of Engineering.', 'Because it has achieved significance \nwithin the past fifty years, Criteria Consideration G applies.', 'Under Criterion A, Discovery is significant as the oldest \nof the three extant orbiter vehicles constructed for the Space Shuttle Program (SSP), the longest running American \nspace program to date; she was the third of five orbiters built by NASA.', 'Unlike the Mercury, Gemini, and Apollo \nprograms, the SSP’s emphasis was on cost effectiveness and reusability, and eventually the construction of a space station. \n', 'Including her maiden voyage (launched August 30, 1984), Discovery flew to space thirty-nine times, more than any of the other \nfour orbiters; she was

In [6]:
# Example of scoring sentences (for summarization)
sentence_scores = {}
for sent in doc.sents:
    for word in sent:
        if word.text.lower() in filtered_tokens:
            if sent not in sentence_scores:
                sentence_scores[sent] = 1
            else:
                sentence_scores[sent] += 1

In [7]:
# Get the top 3 sentences as the summary
summary_sentences = heapq.nlargest(3, sentence_scores, key=sentence_scores.get)
summary = ' '.join([sent.text for sent in summary_sentences])

In [8]:
# Output the summary
print("\nSummary:")
print(summary)


Summary:
Although her base structure followed a conventional aircraft design, she used advanced materials that both 
minimized her weight for cargo-carrying purposes and featured low thermal expansion ratios, which provided a stable base for her Thermal 
Protection System (TPS) materials. Under Criterion A, Discovery is significant as the oldest 
of the three extant orbiter vehicles constructed for the Space Shuttle Program (SSP), the longest running American 
space program to date; she was the third of five orbiters built by NASA. According to Wayne Hale, a flight director from Johnson Space Center, 
the Space Shuttle orbiter represents a “huge technological leap from expendable rockets and capsules to a reusable, winged, hypersonic, 
cargo-carrying spacecraft.”
