# EXTRACTIVE TEXT SUMMERIZER

### IMPORTING REQUIRED LIBRARIES 

In [1]:
import nltk
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\prana\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
text_corpus = """One day, Molly the milkmaid had filled her pails with milk. Her job was to milk the cows, and then bring the milk to the market to sell. Molly loved to think about what to spend her money on.
As she filled the pails with milk and went to market, she again thought of all the things she wanted to buy. As she walked along the road, she thought of buying a cake and a basket full of fresh strawberries.
A little further down the road, she spotted a chicken. She thought, “With the money I get from today, I’m going to buy a chicken of my own. That chicken will lay eggs, then I will be able to sell milk and eggs and get more money!”
She continued, “With more money, I will be able to buy a fancy dress and make all the other milkmaids jealous.” Out of excitement, Molly started skipping, forgetting about the milk in her pails. Soon, the milk started spilling over the edges, covering Molly.
Drenched, Molly said to herself, “Oh no! I will never have enough money to buy a chicken now.” She went home with her empty pails.
“Oh, my goodness! What happened to you?” Molly’s mother asked.
“I was too busy dreaming about all the things I wanted to buy that I forgot about the pails,” she answered.
“Oh, Molly, my dear. How many times do I need to say, ‘Don’t count your chickens until they hatch?’”
The Moral
Don’t count your chickens before they hatch."""

### SENTENCE LEVEL TOKENIZATION

In [3]:
text_corp = text_corpus.replace("\\r\\n", "")
sentences = sent_tokenize(text_corp)

In [4]:
#Peeking into our tokenized sentences
print("Senetence are: \n", sentences)

Senetence are: 
 ['One day, Molly the milkmaid had filled her pails with milk.', 'Her job was to milk the cows, and then bring the milk to the market to sell.', 'Molly loved to think about what to spend her money on.', 'As she filled the pails with milk and went to market, she again thought of all the things she wanted to buy.', 'As she walked along the road, she thought of buying a cake and a basket full of fresh strawberries.', 'A little further down the road, she spotted a chicken.', 'She thought, “With the money I get from today, I’m going to buy a chicken of my own.', 'That chicken will lay eggs, then I will be able to sell milk and eggs and get more money!”\nShe continued, “With more money, I will be able to buy a fancy dress and make all the other milkmaids jealous.” Out of excitement, Molly started skipping, forgetting about the milk in her pails.', 'Soon, the milk started spilling over the edges, covering Molly.', 'Drenched, Molly said to herself, “Oh no!', 'I will never have 

### CREATING SENTENCE ORGANIZER

In [5]:
sentence_organizer = {k:v for v,k in enumerate(sentences)}

In [6]:
print("Our sentence organizer: \n", sentence_organizer)

Our sentence organizer: 
 {'One day, Molly the milkmaid had filled her pails with milk.': 0, 'Her job was to milk the cows, and then bring the milk to the market to sell.': 1, 'Molly loved to think about what to spend her money on.': 2, 'As she filled the pails with milk and went to market, she again thought of all the things she wanted to buy.': 3, 'As she walked along the road, she thought of buying a cake and a basket full of fresh strawberries.': 4, 'A little further down the road, she spotted a chicken.': 5, 'She thought, “With the money I get from today, I’m going to buy a chicken of my own.': 6, 'That chicken will lay eggs, then I will be able to sell milk and eggs and get more money!”\nShe continued, “With more money, I will be able to buy a fancy dress and make all the other milkmaids jealous.” Out of excitement, Molly started skipping, forgetting about the milk in her pails.': 7, 'Soon, the milk started spilling over the edges, covering Molly.': 8, 'Drenched, Molly said to he

### CREATING TF-IDF MODEL

In [7]:
# Let's now create a tf-idf (Term frequnecy Inverse Document Frequency) model
tf_idf_vectorizer = TfidfVectorizer(min_df=2,  max_features=None, 
                                    strip_accents='unicode', 
                                    analyzer='word',
                                    token_pattern=r'\w{1,}',
                                    ngram_range=(1, 3), 
                                    use_idf=1,smooth_idf=1,
                                    sublinear_tf=1,
                                    stop_words = 'english')

In [8]:
# Passing our sentences treating each as one document to TF-IDF vectorizer
tf_idf_vectorizer.fit(sentences)

TfidfVectorizer(min_df=2, ngram_range=(1, 3), smooth_idf=1,
                stop_words='english', strip_accents='unicode', sublinear_tf=1,
                token_pattern='\\w{1,}', use_idf=1)

In [9]:
# Transforming our sentences to TF-IDF vectors
sentence_vectors = tf_idf_vectorizer.transform(sentences)

### PERFORMING SENTENCE SCORING

In [10]:
# Getting sentence scores for each sentences
sentence_scores = np.array(sentence_vectors.sum(axis=1)).ravel()

# Sanity checkup
print(len(sentences) == len(sentence_scores))

True


In [11]:
# Getting top-n sentences
N = int(len(sentences)/3) + 1
top_n_sentences = [sentences[ind] for ind in np.argsort(sentence_scores, axis=0)[::-1][:N]]

### PERFORMING FINAL SUMMERIZATION

In [12]:
# Let's now do the sentence ordering using our prebaked sentence_organizer
# Let's map the scored sentences with their indexes
mapped_top_n_sentences = [(sentence,sentence_organizer[sentence]) for sentence in top_n_sentences]
print("Our top_n_sentence with their index: \n")
for element in mapped_top_n_sentences:
    print(element)

# Ordering our top-n sentences in their original ordering
mapped_top_n_sentences = sorted(mapped_top_n_sentences, key = lambda x: x[1])
ordered_scored_sentences = [element[0] for element in mapped_top_n_sentences]

# Our final summary
summary = " ".join(ordered_scored_sentences)

Our top_n_sentence with their index: 

('As she filled the pails with milk and went to market, she again thought of all the things she wanted to buy.', 3)
('That chicken will lay eggs, then I will be able to sell milk and eggs and get more money!”\nShe continued, “With more money, I will be able to buy a fancy dress and make all the other milkmaids jealous.” Out of excitement, Molly started skipping, forgetting about the milk in her pails.', 7)
('“I was too busy dreaming about all the things I wanted to buy that I forgot about the pails,” she answered.', 13)
('One day, Molly the milkmaid had filled her pails with milk.', 0)
('I will never have enough money to buy a chicken now.” She went home with her empty pails.', 10)
('She thought, “With the money I get from today, I’m going to buy a chicken of my own.', 6)
('Her job was to milk the cows, and then bring the milk to the market to sell.', 1)


## RESULT/SUMMARY

In [13]:
print("Summary: \n", summary)

Summary: 
 One day, Molly the milkmaid had filled her pails with milk. Her job was to milk the cows, and then bring the milk to the market to sell. As she filled the pails with milk and went to market, she again thought of all the things she wanted to buy. She thought, “With the money I get from today, I’m going to buy a chicken of my own. That chicken will lay eggs, then I will be able to sell milk and eggs and get more money!”
She continued, “With more money, I will be able to buy a fancy dress and make all the other milkmaids jealous.” Out of excitement, Molly started skipping, forgetting about the milk in her pails. I will never have enough money to buy a chicken now.” She went home with her empty pails. “I was too busy dreaming about all the things I wanted to buy that I forgot about the pails,” she answered.


In [14]:
def summarizer(text, tokenizer):
    text_corp = text_corpus.replace("\\r\\n", "")
    sentences = sent_tokenize(text_corp)
    # Let's create an organizer which will store the sentence ordering to later reorganize the 
    # scored sentences in their correct order
    sentence_organizer = {k:v for v,k in enumerate(sentences)}
    # Let's now create a tf-idf (Term frequnecy Inverse Document Frequency) model
    tf_idf_vectorizer = TfidfVectorizer(min_df=2,  max_features=None, 
                                        strip_accents='unicode', 
                                        analyzer='word',
                                        token_pattern=r'\w{1,}',
                                        ngram_range=(1, 3), 
                                        use_idf=1,smooth_idf=1,
                                        sublinear_tf=1,
                                        stop_words = 'english')
    # Passing our sentences treating each as one document to TF-IDF vectorizer
    tf_idf_vectorizer.fit(sentences)
    # Transforming our sentences to TF-IDF vectors
    sentence_vectors = tf_idf_vectorizer.transform(sentences)
    # Getting sentence scores for each sentences
    sentence_scores = np.array(sentence_vectors.sum(axis=1)).ravel()
    # Getting top-n sentences
    N = int(len(sentences)/3) + 1
    top_n_sentences = [sentences[ind] for ind in np.argsort(sentence_scores, axis=0)[::-1][:N]]
    # Let's now do the sentence ordering using our prebaked sentence_organizer
    # Let's map the scored sentences with their indexes
    mapped_top_n_sentences = [(sentence,sentence_organizer[sentence]) for sentence in top_n_sentences]
    # Ordering our top-n sentences in their original ordering
    mapped_top_n_sentences = sorted(mapped_top_n_sentences, key = lambda x: x[1])
    ordered_scored_sentences = [element[0] for element in mapped_top_n_sentences]
    # Our final summary
    summary = " ".join(ordered_scored_sentences)
    return summary

In [17]:
print("Summary : \n" , summarizer(text = text_corpus,tokenizer = nltk))

Summary : 
 One day, Molly the milkmaid had filled her pails with milk. As she filled the pails with milk and went to market, she again thought of all the things she wanted to buy. She thought, “With the money I get from today, I’m going to buy a chicken of my own. That chicken will lay eggs, then I will be able to sell milk and eggs and get more money!”
She continued, “With more money, I will be able to buy a fancy dress and make all the other milkmaids jealous.” Out of excitement, Molly started skipping, forgetting about the milk in her pails. I will never have enough money to buy a chicken now.” She went home with her empty pails. “I was too busy dreaming about all the things I wanted to buy that I forgot about the pails,” she answered.
