In [2]:
from bs4 import BeautifulSoup
from bs4.element import Comment
import urllib.request
import re

In [3]:
def tag_visible(element):
    if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
        return False
    if isinstance(element, Comment):
        return False
    return True


def text_from_html(body):
    soup = BeautifulSoup(body, 'html.parser')
    texts = soup.findAll(text=True)
    visible_texts = filter(tag_visible, texts)  
    return u" ".join(t.strip() for t in visible_texts)

In [4]:
class Reporte:
    def __init__(self, url, date):
        html = urllib.request.urlopen(url).read()
        informe_web = text_from_html(html)
        self.informe_web = informe_web.replace('.', '')
        self.date = date
        
        self.new_ant, self.new_med = self.get_new_cases()
        self.dead_ant, self.dead_med = self.get_dead()
        self.act_ant, self.act_med = self.get_act()
        self.rec_ant, self.rec_med = self.get_rec()
    
    def __str__(self):
        return self.date
    
    def __repr__(self):
        return self.date
    
    def get_new_cases(self):
        regex = re.compile(r'El informe de hoy registra (\d+) casos nuevos en el departamento De ellos en Medellín hay \((\d+)\)')
        match = regex.search(self.informe_web)

        ant = int(match.group(1))
        med = int(match.group(2))
        
        return ant, med

    def get_dead(self):
        regex = re.compile(r'Hoy se reportan (\d+) fallecidos En Medellín hay \((\d+)\) de ellos')
        match = regex.search(self.informe_web)
        
        ant = int(match.group(1))
        med = int(match.group(2))
        
        return ant, med
    
    def get_act(self):
        ant_regex = re.compile(r'El informe indica que, a la fecha, hay (\d+) casos activos en el departamento')
        med_regex = re.compile(r'De los activos, en Medellín hay (\d+) casos')
        
        ant = int(ant_regex.search(self.informe_web).group(1))
        med = int(med_regex.search(self.informe_web).group(1))
        
        return ant, med
    
    def get_rec(self):
        regex = re.compile(r'De acuerdo con el informe, las (\d+) personas recuperadas en el departamento están distribuidas así: Medellín \((\d+)\)')
        match = regex.search(self.informe_web)
        
        ant = int(match.group(1))
        med = int(match.group(2))
        
        return ant, med
    
    def assert_consistency(self, other):
        return self.assert_consistency_med(other), self.assert_consistency_ant(other)
        
    def assert_consistency_med(self, past):
        curr = past.act_med + self.new_med - self.dead_med - (self.rec_med - past.rec_med)

        if self.act_med != curr:
            return self.act_med - curr
        else:
            print('pass')
            return 0
    
    def assert_consistency_ant(self, past):
        curr = past.act_ant + self.new_ant - self.dead_ant - (self.rec_ant - past.rec_ant)

        if self.act_ant != curr:
            return self.act_ant - curr
        else:
            print('pass')
            return 0

In [5]:
nov_8 = Reporte(
    'https://dssa.gov.co/index.php/situacion-actual-coronavirus-en-antioquia/item/1386-con-1-706-casos-nuevos-registrados-hoy-el-numero-de-contagiados-por-covid-19-en-antioquia-se-eleva-a-183-991', 
    'Domingo, 08 Noviembre 2020'
)
nov_7 = Reporte(
    'https://dssa.gov.co/index.php/situacion-actual-coronavirus-en-antioquia/item/1385-con-1-765-casos-nuevos-registrados-hoy-el-numero-de-contagiados-por-covid-19-en-antioquia-se-eleva-a-182-285',
    'Sábado, 07 Noviembre 2020'
)

In [6]:
nov_8.assert_consistency(nov_7)

(-3, -2)