 Import 'spacy' for stopwords and 'string' for punctuation.

In [None]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
import string
from collections import defaultdict
from heapq import nlargest

# Loading the text from the 'spaceX_DP.txt' file
with open('spaceX_DP.txt', 'r', encoding='utf-8') as file:
    text = file.read()


• Tokenize the 'SpaceX.txt' corpus.


In [None]:
# Loading the English tokenizer and stop words
nlp = spacy.load('en_core_web_sm')

#Tokenizing the text and removing stopwords and punctuation
doc = nlp(text)
tokens = [token.text for token in doc if token.text not in STOP_WORDS and token.text not in string.punctuation]


• Build 'word frequency'. Make sure you have removed the stopwords.

• Determine the maximum frequency as 'word_frequencies[word]=
(word_frequencies[word]/maximum_frequency).




In [None]:
# Building word frequency dictionary
word_frequencies = defaultdict(int)
for word in tokens:
    word_frequencies[word] += 1

maximum_frequency = max(word_frequencies.values())

for word in word_frequencies:
    word_frequencies[word] = (word_frequencies[word] / maximum_frequency)


• Tokenize the sentences. Generate the sentence_scores. Score every sentence
based on number of words.


In [None]:
# Tokenizing sentences
sentences = list(doc.sents)

# Generating sentence scores based on the number of words in each sentence
sentence_scores = {}
for sentence in sentences:
    for word in sentence:
        if word.text in word_frequencies:
            if sentence not in sentence_scores:
                sentence_scores[sentence] = word_frequencies[word.text]
            else:
                sentence_scores[sentence] += word_frequencies[word.text]


• Import nlargest from heapq and provide 'summarized_sentences'.


In [None]:
from heapq import nlargest

# number of sentences for the summary (e.g., 5 sentences)
num_summary_sentences = 5

#summarized sentences using nlargest
summarized_sentences = nlargest(num_summary_sentences, sentence_scores, key=sentence_scores.get)


• Convert sentences from spacy to strings and join all sentences.


In [None]:
summary = ' '.join([str(sentence) for sentence in summarized_sentences])




• Determine the length of the summary.

• Determine the length of the original text.

In [None]:

summary_length = len(summary)
original_text_length = len(text)


Provide a summary once you have converted spacy outputs to strings.

• Use spacy and textrank to summarize the text.

• Print the summary in 5 sentences.

In [None]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from collections import Counter

# Loading the English language model and adding the 'sentencizer' component
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe('sentencizer')

doc = nlp(text)

sentences = [sent.text for sent in doc.sents]
word_freq = Counter(word.text.lower() for word in doc if word.text not in STOP_WORDS and word.text not in punctuation)
sentence_scores = []
for sentence in sentences:
    sentence_scores.append((sentence, sum(word_freq[word.text.lower()] for word in nlp(sentence))))

sentence_scores = sorted(sentence_scores, key=lambda x: x[1], reverse=True)

# Selecting the top 5 sentences as the summary
num_summary_sentences = 5
summary_sentences = [sentence[0] for sentence in sentence_scores[:num_summary_sentences]]

summary = ' '.join(summary_sentences)

# Printing the summary in 5 sentences
print(summary)


However, the regulatory agency that is was supposed to be guiding this environmental impact process, the FAA, allowed SpaceX.”Five years after the FAA issued its Final Environmental Impact Statement, the agency issued a 23-page “Written Re-Evaluation of the 2014 Final Environmental Impact Statement for the SpaceX Texas Launch Site,” on May 21, 2019, that acknowledged SpaceX had switched from the Falcon program to the Starship project that included a new “experimental test program.” The metamorphosis of this facility, which sits on tender tidal flats and feet from sand dunes where sea turtles lay eggs, has largely gone unnoticed and under the radar in this border community where environmentalists say they are strapped between fighting the construction of Donald Trump’s border wall through the region, and the development of three new liquefied natural gas facilities at the deepwater Port of Brownsville just 5 miles from SpaceX. More hangars and buildings have been built 1.5 miles from th