<a href="https://colab.research.google.com/github/kovvurisupraj/Feature-Engineering/blob/main/Task2_p2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
from nltk.stem import WordNetLemmatizer

In [None]:
def read_article(file_name):
    file = open(file_name, "r")
    filedata = file.readlines()
    article = filedata[0].split(". ")
    sentences = []

    for sentence in article:
        print(sentence)
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    sentences.pop() 
    
    return sentences

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
 
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
 
    all_words = list(set(sent1 + sent2))
 
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
 
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
 
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
 
    return 1 - cosine_distance(vector1, vector2)
 
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix


def generate_summary(file_name, top_n=5):
    nltk.download("stopwords")
    stop_words = stopwords.words('english')
    summarize_text = []

    # Step 1 - Read text anc split it
    sentences =  read_article(file_name)

    # Step 2 - Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Step 3 - Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Step 4 - Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
    print("Indexes of top ranked_sentence order are ", ranked_sentence)    

    for i in range(top_n):
      summarize_text.append(" ".join(ranked_sentence[i][1]))
    xyz=". ".join(summarize_text)
#Special character cleaning
    xyz= xyz.replace("\r", " ")
    xyz = xyz.replace("\n", " ")
    xyz = xyz.replace("    ", " ")
    xyz=xyz.replace('"', '')

#Upcase/downcase
    xyz= xyz.lower()

#Punctuation signs
    punctuation_signs = list("?:!.,;")

    for punct_sign in punctuation_signs:
        xyz = xyz.replace(punct_sign, '')

#Stemming and Lemmatization
    nltk.download('punkt')
    print("------------------------------------------------------------")
    nltk.download('wordnet')
    # Saving the lemmatizer into an object
    wordnet_lemmatizer = WordNetLemmatizer()
    # Create an empty list containing lemmatized words
    lemmatized_list = []

    # Iterate through every word to lemmatize
    for word in xyz:
        lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos="v"))
        
    # Join the list
    lemmatized_text = " ".join(lemmatized_list)
    final_text= lemmatized_text
    # Step 5 - Offcourse, output the summarize text
    print("Summarize Text: \n", final_text)

In [None]:
generate_summary("file1.txt",1)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


 It's a late night in the Metro area of Phoenix, Arizona
Under the artificial glare of street lamps, a car can be seen slowly approaching
Active sensors on the vehicle radiate a low hum
A green and blue 'W' glows from the windscreen, giving off just enough light to see inside – to a completely empty driver seat
 The wheel navigates the curb steadily, parking as an arrival notification pings on the phone of the person waiting for it
When they open the door to climb inside, a voice greets them over the vehicle's sound system
"Good evening, this car is all yours – with no one upfront," it says
 This is a Waymo One robotaxi, hailed just 10 minutes ago using an app
The open use of this service to the public, slowly expanding across the US, is one of the many developments signalling that driverless technology is truly becoming a part of our lives
 The promise of driverless technology has long been enticing
It has the potential to transform our experience of commuting and long journeys, take 

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Summarize Text: 
     t h e   u l t i m a t e   v i s i o n   e x p e r t s   a r e   w o r k i n g   t o w a r d s   i s   o f   c o m p l e t e l y   d r i v e r l e s s   v e h i c l e s   b o t h   w i t h i n   i n d u s t r y   w i d e r   t r a n s p o r t   n e t w o r k s   a n d   p e r s o n a l - u s e   c a r s   t h a t   c a n   b e   d e p l o y e d   a n d   u s e d   a n y w h e r e   a n d   e v e r y w h e r e   a r o u n d   t h e   w o r l d b u t   w i t h   a l l   t h e s e   h u r d l e s   i n   p l a c e   w h a t   e x a c t l y   d o e s   t h e   n e x t   1 0   y e a r s   h a v e   i n   s t o r e   f o r   a u t o n o m o u s   v e h i c l e s     t w o   y e a r s   f r o m   n o w     t h e   b i g g e s t   h u r d l e   f o r   t h o s e   i n   t h e   d r i v e r l e s s   t e c h n o l o g y   i n d u s t r y   i s   h o w   t o   g e t   t h e   c a r s   t o   o p e r a t e   s a f e l y   a n d   e f f e c t i v e l y   i n   c o m p l e x   

In [None]:
generate_summary("file2.txt",1)

ost people are not very familiar with the concept of artificial intelligence (AI)
As an illustration, when 1,500 senior business leaders in the United States in 2017 were asked about AI, only 17 percent said they were familiar with it.[1] A number of them were not sure what it was or how it would affect their particular companies
They understood there was considerable potential for altering business processes, but were not clear how AI could be deployed within their own organizations.Despite its widespread lack of familiarity, AI is a technology that is transforming every walk of life
It is a wide-ranging tool that enables people to rethink how we integrate information, analyze data, and use the resulting insights to improve decisionmaking
Our hope through this comprehensive overview is to explain AI to an audience of policymakers, opinion leaders, and interested observers, and demonstrate how AI already is altering the world and raising important questions for society, the economy, an

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
generate_summary("file3.txt",1)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


INTRO  Global Partners for Development proudly presents: What do you understand? A deep dive into the many facets of philanthropy and development
Experts in their field will discuss an aspect of their work that they understand particularly well
Let’s talk about big bets, innovation, social enterprises, large scale humanitarian aid, and the fixation on ending things or solving humanity’s greatest problems and the issues that arise while tackling it all
 I am your host, Ria Pullin, and my co-host is the Executive Director of Global Partners for Development, Daniel Casanova
 RIA  Our guest today is Alex Counts
Alex is an author, an independent consultant to nonprofit organizations, and the founder of the Grameen Foundation
He has recently released a new edition of his book, Small Loans, Big Dreams
 DANIEL  Well, I mean, I think where I start is so many people know about Grameen Bank and Muhammad Yunus
And I, you know, I’d like to hear about you and like how, and I know that in the book yo

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
generate_summary("file4.txt",1)

Half of the women over the age of 15 around the world can’t read or write, yet educated females are greater contributors to their communities, are healthier, and are more likely to educate their own children than uneducated women
Global Partners is dedicated to improving the quality of life and education for girls in East Africa, and we are excited to share the latest updates regarding Global Partners’ Girls’ Education Program
 One of the ways we support girls’ education is through the funding of individual secondary school and tertiary level scholarships with the support of our implementing partners: Executive Women in Development (EWIDA) in Uganda and the Pastoral Women’s Council (PWC) in Tanzania
Attached to this article is a report from EWIDA documenting the progress of our scholarship recipients in Uganda from September through December 2019
 We are also thrilled to announce that we recently hired four former Global Partners scholarship recipients from Uganda to work as savings of

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
generate_summary("file5.txt",1)

Machine learning is enabling computers to tackle tasks that have, until now, only been carried out by people
 From driving cars to translating speech, machine learning is driving an explosion in the capabilities of artificial intelligence – helping software make sense of the messy and unpredictable real world
 But what exactly is machine learning and what is making the current boom in machine learning possible?  What is machine learning? At a very high level, machine learning is the process of teaching a computer system how to make accurate predictions when fed data
 Those predictions could be answering whether a piece of fruit in a photo is a banana or an apple, spotting people crossing the road in front of a self-driving car, whether the use of the word book in a sentence relates to a paperback or a hotel reservation, whether an email is spam, or recognizing speech accurately enough to generate captions for a YouTube video
 The key difference from traditional computer software is tha

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Indexes of top ranked_sentence order are  [(0.020080097111172177, ['', 'SEE:', 'What', 'is', 'AI?', 'Everything', 'you', 'need', 'to', 'know', 'about', 'Artificial', 'Intelligence', '', 'The', 'viability', 'of', 'semi-supervised', 'learning', 'has', 'been', 'boosted', 'recently', 'by', 'Generative', 'Adversarial', 'Networks', '(GANs),', 'machine-learning', 'systems', 'that', 'can', 'use', 'labelled', 'data', 'to', 'generate', 'completely', 'new', 'data,', 'which', 'in', 'turn', 'can', 'be', 'used', 'to', 'help', 'train', 'a', 'machine-learning', 'model']), (0.020066794869337375, ['', 'Why', 'is', 'domain', 'knowledge', 'important?', 'Another', 'important', 'decision', 'when', 'training', 'a', 'machine-learning', 'model', 'is', 'which', 'data', 'to', 'train', 'the', 'model', 'on']), (0.019479768345901635, ['', 'But', 'what', 'exactly', 'is', 'machine', 'learning', 'and', 'what', 'is', 'making', 'the', 'current', 'boom', 'in', 'machine', 'learning', 'possible?', '', 'What', 'is', 'machin

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
