In [1]:
pip install networkx==2.6

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install scipy==1.8.0

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Importing Packages

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
import requests
import pandas as pd
import io

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\jules\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
# Import data

url = "https://raw.githubusercontent.com/julesz12345/Text-Summarizer/main/articles.csv"
download = requests.get(url).content
articles = pd.read_csv(io.StringIO(download.decode('utf-8')))

In [5]:
# Reading Article and Splitting it into Sentences

def read_article(articles):
    article = articles.split(". ")
    sentences = []
    for sentence in article:
        print(sentence)
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    sentences.pop() 
    
    return sentences

In [6]:
# Finding Similarity between Sentences

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
 
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
 
    all_words = list(set(sent1 + sent2))
 
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
 
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
 
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
 
    return 1 - cosine_distance(vector1, vector2)

In [7]:
# Building Similarity Matrix

def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix

In [11]:
# Extract Summary

def generate_summary(articles, top_n=5):
    stop_words = stopwords.words('english')
    summarize_text = []

    # Step 1 - Read text anc split it
    sentences =  read_article(articles)

    # Step 2 - Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Step 3 - Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Step 4 - Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)       

    for i in range(top_n):
      summarize_text.append(" ".join(ranked_sentence[i][1]))

    # Step 5 - Offcourse, output the summarize texr
    print("Summarize Text: \n", ". ".join(summarize_text))

# let's begin
text = articles['article'][5]
generate_summary(text, 1)

The team with the lowest transfer outlay this season rose unassumingly into eighth place on Wednesday night
Stoke City beat Everton, to nobody’s great surprise, and look on course for their best Premier League finish yet – a year after their last
Mark Hughes really should be in the running for those annual accolades saluting success amid all this
Manager of the Year? Quite possibly
Mark Hughes looks on with a smile as his Stoke City side cruise to a 2-0 win over Everton on Wednesday 
He has signed with remarkable shrewdness, shaped his players into a cohesive style, and overcome the set-backs of regular injuries to key players to guide Stoke to 42 points after 28 games
His team registered 50 Premier League points in reaching ninth last term, two club records, but are currently in a rich vein of form and likely to go higher
Seven wins from 11 in the league have seen Stoke nudge past West Ham United and Swansea – both clubs with claims to good seasons
And all the while forking out less c