In [1]:
import pandas as pd
import numpy as np
from goose3 import Goose
import re # relugar expression
import nltk # natural language toolkit
import string
import heapq

In [2]:
g = Goose()
url = 'https://www.mcleanvet.com/5-things-you-can-do-to-improve-your-pets-dental-health-by-dr-eva-chung/'
article = g.extract(url)

In [3]:
article.infos

{'meta': {'description': 'So you’ve noticed an offensive odour from your pet’s mouth recently, maybe even a slight decline in appetite or she prefers soft food versus kibble. Unfortunately, many pet owners are unaware these signs may be indicators of dental disease and their pet is experiencing dental pain. In fact, many pets...',
  'lang': 'en',
  'keywords': '',
  'favicon': 'https://www.mcleanvet.com/wp-content/uploads/sites/227/2022/03/favicon.png',
  'canonical': 'https://www.mcleanvet.com/5-things-you-can-do-to-improve-your-pets-dental-health-by-dr-eva-chung/',
  'encoding': 'UTF-8'},
 'image': None,
 'domain': 'www.mcleanvet.com',
 'title': '5 Things You Can Do To Improve Your Pet’s Dental Health',
 'cleaned_text': 'So you’ve noticed an offensive odour from your pet’s mouth recently, maybe even a slight decline in appetite or she prefers soft food versus kibble. Unfortunately, many pet owners are unaware these signs may be indicators of dental disease and their pet is experienci

In [4]:
article.title

'5 Things You Can Do To Improve Your Pet’s Dental Health'

In [5]:
article.cleaned_text

'So you’ve noticed an offensive odour from your pet’s mouth recently, maybe even a slight decline in appetite or she prefers soft food versus kibble. Unfortunately, many pet owners are unaware these signs may be indicators of dental disease and their pet is experiencing dental pain. In fact, many pets have evidence of dental disease long before they are considered “old”, because they have not been given proper dental care. To prevent unnecessary oral pain, improve the quality of life and health of a pet, it is imperative a dental care program is implemented. By following the tips below, you will be a step ahead in fighting dental disease.\n\nThe number one way to prevent plaque and calculus/tarter formation is by brushing your pet’s teeth daily. Bacteria from a pet’s mouth results in plaque formation and overtime the minerals in saliva will cause the plaque to harden into calculus. Once calculus is present, it cannot be brushed off. The calculus will lead to gum disease which in turn c

In [6]:
len(article.cleaned_text)

4444

In [7]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [8]:
stopwords = nltk.corpus.stopwords.words('english')

In [9]:
def preprocess(text):
  formatted_text = text.lower()
  tokens = []
  for token in nltk.word_tokenize(formatted_text):
    tokens.append(token)
  #print(tokens)
  tokens = [word for word in tokens if word not in stopwords and word not in string.punctuation]
  formatted_text = ' '.join(element for element in tokens)

  return formatted_text

In [10]:
formatted_article = preprocess(article.cleaned_text)
formatted_article

'’ noticed offensive odour pet ’ mouth recently maybe even slight decline appetite prefers soft food versus kibble unfortunately many pet owners unaware signs may indicators dental disease pet experiencing dental pain fact many pets evidence dental disease long considered “ old ” given proper dental care prevent unnecessary oral pain improve quality life health pet imperative dental care program implemented following tips step ahead fighting dental disease number one way prevent plaque calculus/tarter formation brushing pet ’ teeth daily bacteria pet ’ mouth results plaque formation overtime minerals saliva cause plaque harden calculus calculus present brushed calculus lead gum disease turn lead dental abscess loose teeth recommend pet owners speak veterinarian proper brushing technique well use veterinarian recommended toothpaste many diets claims preventing calculus formation market today recommend dental diets pet owners unable brush pet ’ teeth pet pre-existing condition strongly r

In [11]:
len(formatted_article)

3099

In [12]:
def summarize(text, number_of_sentences, percentage = 0):
  original_text = text
  formatted_text = preprocess(original_text)

  word_frequency = nltk.FreqDist(nltk.word_tokenize(formatted_text))
  highest_frequency = max(word_frequency.values())
  for word in word_frequency.keys():
    word_frequency[word] = (word_frequency[word] / highest_frequency)
  sentence_list = nltk.sent_tokenize(original_text)
  
  score_sentences = {}
  for sentence in sentence_list:
    for word in nltk.word_tokenize(sentence):
      if word in word_frequency.keys():
        if sentence not in score_sentences.keys():
          score_sentences[sentence] = word_frequency[word]
        else:
          score_sentences[sentence] += word_frequency[word]

  import heapq
  if percentage > 0:
    best_sentences = heapq.nlargest(int(len(sentence_list) * percentage), score_sentences, key=score_sentences.get)
  else:
    best_sentences = heapq.nlargest(number_of_sentences, score_sentences, key=score_sentences.get)

  return sentence_list, best_sentences, word_frequency, score_sentences

In [13]:
sentence_list, best_sentences, word_frequency, score_sentences = summarize(article.cleaned_text, 15)

In [14]:
sentence_list

['So you’ve noticed an offensive odour from your pet’s mouth recently, maybe even a slight decline in appetite or she prefers soft food versus kibble.',
 'Unfortunately, many pet owners are unaware these signs may be indicators of dental disease and their pet is experiencing dental pain.',
 'In fact, many pets have evidence of dental disease long before they are considered “old”, because they have not been given proper dental care.',
 'To prevent unnecessary oral pain, improve the quality of life and health of a pet, it is imperative a dental care program is implemented.',
 'By following the tips below, you will be a step ahead in fighting dental disease.',
 'The number one way to prevent plaque and calculus/tarter formation is by brushing your pet’s teeth daily.',
 'Bacteria from a pet’s mouth results in plaque formation and overtime the minerals in saliva will cause the plaque to harden into calculus.',
 'Once calculus is present, it cannot be brushed off.',
 'The calculus will lead 

In [15]:
best_sentences

['I recommend dental diets to pet owners who are unable to brush their pet’s teeth and if their pet does not have a pre-existing condition.',
 'Unfortunately, many pet owners are unaware these signs may be indicators of dental disease and their pet is experiencing dental pain.',
 'I recommend regular exams with your veterinarian to assess your pet’s teeth and whether a dental is the right option for your pet.',
 'In general, dental diets work in two ways; the mechanical action while a pet chews the kibble and the enzymes present in the food to help reduce bacteria and plaque formation.',
 'Even if a pet owner is diligent at brushing their pet’s teeth, a cleaning under anesthesia may be necessary to remove tarter from along and beneath the gingiva.',
 'This alternative is recommended for owners who cannot brush their pet’s teeth and when a dental diet is not appropriate.',
 'In fact, many pets have evidence of dental disease long before they are considered “old”, because they have not b

In [16]:
word_frequency

FreqDist({'dental': 1.0, 'pet': 0.8518518518518519, '’': 0.4074074074074074, 'teeth': 0.25925925925925924, 'calculus': 0.25925925925925924, 'diets': 0.25925925925925924, 'disease': 0.2222222222222222, 'veterinarian': 0.2222222222222222, 'owners': 0.18518518518518517, 'plaque': 0.18518518518518517, ...})

In [17]:
score_sentences

{'So you’ve noticed an offensive odour from your pet’s mouth recently, maybe even a slight decline in appetite or she prefers soft food versus kibble.': 2.407407407407408,
 'Unfortunately, many pet owners are unaware these signs may be indicators of dental disease and their pet is experiencing dental pain.': 4.592592592592593,
 'In fact, many pets have evidence of dental disease long before they are considered “old”, because they have not been given proper dental care.': 3.074074074074074,
 'To prevent unnecessary oral pain, improve the quality of life and health of a pet, it is imperative a dental care program is implemented.': 2.7037037037037037,
 'By following the tips below, you will be a step ahead in fighting dental disease.': 1.4074074074074074,
 'The number one way to prevent plaque and calculus/tarter formation is by brushing your pet’s teeth daily.': 2.3703703703703702,
 'Bacteria from a pet’s mouth results in plaque formation and overtime the minerals in saliva will cause th

In [18]:
def visualize(title, sentence_list, best_sentences):
  from IPython.core.display import HTML
  text = ''

  display(HTML(f'<h1>Summary - {title}</h1>'))
  for sentence in sentence_list:
    if sentence in best_sentences:
      text += ' ' + str(sentence).replace(sentence, f"<mark>{sentence}</mark>")
    else:
      text += ' ' + sentence
  display(HTML(f""" {text} """))

In [19]:
visualize(article.title, sentence_list, best_sentences)

In [20]:
article_list = ['https://www.mcleanvet.com/cottaging-with-your-pet/',
                'https://www.mcleanvet.com/everything-need-know-heartworm-ontario/',
                'https://www.vanislevet.com/foot-care-healthy-dog/']

In [21]:
for url in article_list:
    g = Goose()
    article = g.extract(url)
    sentence_list, best_sentences, _, _ = summarize(article.cleaned_text, 100, percentage=0.3)
    visualize(article.title, sentence_list, best_sentences)