In [7]:
!pip install nltk
!pip install newspaper3k
!pip install numpy
!pip install networkx


Collecting newspaper3k
  Downloading newspaper3k-0.2.8-py3-none-any.whl (211 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.1/211.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Collecting cssselect>=0.9.2 (from newspaper3k)
  Downloading cssselect-1.2.0-py2.py3-none-any.whl (18 kB)
Collecting feedparser>=5.2.1 (from newspaper3k)
  Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tldextract>=2.0.1 (from newspaper3k)
  Downloading tldextract-5.1.2-py3-none-any.whl (97 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.6/97.6 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting feedfinder2>=0.0.4 (from newspaper3k)
  Downloading feedfinder2-0.0.4.tar.gz (3.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting jieba3k>=0.35.1 (from newspaper3k)
  Downloading jieba3k-0.35.1.zip

In [8]:
nltk.download('punkt')
nltk.download('stopwords')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [9]:
import nltk
from newspaper import Article
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx

def read_article(url):
    article = Article(url)
    article.download()
    article.parse()
    return article.text

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []

    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]

    all_words = list(set(sent1 + sent2))

    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)

    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1

    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1

    return 1 - cosine_distance(vector1, vector2)

def build_similarity_matrix(sentences, stop_words):
    similarity_matrix = np.zeros((len(sentences), len(sentences)))

    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix

def generate_summary(article_url, top_n=5):
    stop_words = stopwords.words('english')
    summarize_text = []

    # Step 1 - Read the article
    article_content = read_article(article_url)

    # Step 2 - Tokenize the article into sentences
    sentences = sent_tokenize(article_content)

    # Step 3 - Generate Sentence Similarity Matrix
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Step 4 - Rank sentences in Similarity Matrix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Step 5 - Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)

    # Step 6 - Pick top N sentences
    for i in range(top_n):
        summarize_text.append("".join(ranked_sentence[i][1]))

    return " ".join(summarize_text)

# Example usage
url = "https://timesofindia.indiatimes.com/city/bengaluru/bizman-loses-5-2cr-in-stock-app-fraud/articleshow/109483991.cms"
summary = generate_summary(url, top_n=3)
print(summary)

BENGALURU: A 52-year-old businessman dabbling in the stock market allegedly lost Rs 5.2 crore to cybercrooks recently.The victim, Sharath (name changed), director of a private firm and resident of Jayanagar, told cybercrime police in his complaint on April 8 that he received a WhatsApp message on March 11 about investing in the stock market to get high returns.The message also provided a link (bys-app.com). The miscreants gave him multiple accounts to transfer the money and claimed that it was invested in the stock market.By April 2, Sharath had ended up transferring Rs 5.2 crore to five accounts provided by the fraudsters. Sharath didn’t relent at first but the miscreants eventually managed to convince him to download the app.


BENGALURU: A 52-year-old businessman dabbling in the stock market allegedly lost Rs 5.2 crore to cybercrooks recently.The victim, Sharath (name changed), director of a private firm and resident of Jayanagar, told cybercrime police in his complaint on April 8 that he received a WhatsApp message on March 11 about investing in the stock market to get high returns.The message also provided a link (bys-app.com). The miscreants gave him multiple accounts to transfer the money and claimed that it was invested in the stock market.By April 2, Sharath had ended up transferring Rs 5.2 crore to five accounts provided by the fraudsters. Sharath didn’t relent at first but the miscreants eventually managed to convince him to download the app.