In [1]:
from urllib.request import urlopen
from urllib.error import HTTPError,URLError
from bs4 import BeautifulSoup

In [115]:

### WebPage Classe:
class HeadContent:
    """
    Classe-base com os parametros principais da pagina head
    """
    def __init__(self,issue,project,issue_key,url):
        self.issue = issue
        self.project = project
        self.issue_key = issue_key
        self.url = url
        
    def print(self):
        print("--- Head --- \n")
        print(f"Issue = {self.issue} \n")
        print(f"Project = {self.project} \n")
        print(f"Key = {self.issue_key} \n")
        print(f"URL = {self.url} \n")
        
class MainContent:
    """
    Classe-base com os parametros principais da pagina head
    """
    def __init__(self,detail,description,activity):
        self.detail = detail
        self.description = description
        self.activity = activity
        
        
    def print(self):
        print("--- Main Content --- \n")
        print("Detail = \n")
        for (f,k) in self.detail:
            print(f" {f} == {k} ")
        print("\n")
        print("Description = \n")
        for descp in self.description:
            print(descp)
        print("\n")
        print("Activity = \n")
        for act in self.activity:
            print( ("User = %s  \n") % (act['user']) )
            print( ("Date = %s  \n") % (act['date']) )
            print( ("Body = %s  \n") % (act['body']) )
            print("\n")
            
            


        
        
        

class Crawler:
    """
    Classe Para Buscar Paginas bem como fazer o Parser
    """
    
    def getPage(self,url):
        # Conferir se a pagina foi encontrada ou servidor nao foi achado
        try:
            html = urlopen(url)
        except HTTPError as e:
            return None
        except URLError as e:
            return None

        # Se for acessar uma tag conferir se ela existe
        try:
            bs = BeautifulSoup(html.read(),'html.parser')
        except AttributeError as e:
            return None

        return(bs)
    
    
    def parserHead(self,header):
        """
        Parser para coletar dados do header
        """
        try:
            header_title = (header.h1).get_text()
        except:
            header_title = None
            
        try:
            header_metadata = header.ol.findAll('a')
        except:
            header_metadata = None
        finally:
            if(header_metadata != None):
                project_name = header_metadata[0].get_text()
                issue_key = header_metadata[1].get_text() if (header_metadata[1].get_text() != None or header_metadata[1].get_text() != '') else header_metadata[1]['data-issue-key']
                relative_path = header_metadata[1]['href']
            else:
                project_name = None
                issue_key = None
                relative_path = None
        
        head = HeadContent(header_title,project_name,issue_key,relative_path)
        return(head)
    
    def parseIssueMain(self,bs):
        
        def details():
            detail_main_content = bs.find('div',{'id':'details-module'})
            item_detail = detail_main_content.find_all('li',{'class':'item'})
            list_detail = []

            for item in item_detail:
                field = item.find('strong',{'class':'name'}).get_text().replace('\n', '').replace(':','')
                try:
                    key = item.find(['span','div'],{'class':'value'}).get_text().strip()
                except:
                    key = None
                list_detail.append((field,key))

                
            return(list_detail)
            
        def description():
            description_main_content = bs.find('div',{'id':'descriptionmodule'})
            description_itens = description_content.find_all('p')
            list_description = []
            for item in description_itens:
                list_description.append(item.get_text())
                
            return(list_description)
        
        def activity():
            activity_content = bs.find('div',{'id':'activitymodule'})
            issue_user_create = activity_content.find_all('div',{'class':'issue-data-block'})

            list_activity = []
            for item in issue_user_create:
                user = item.find('a',{'class':'user-hover'}).get_text() 
                date = item.find('span',{'class':'date'}).get_text() 
                body = item.find('div',{'class':'action-body'}).get_text()
                dic_item = {"user": user,"date":date,"body":body}
                list_activity.append(dic_item)
                
            return(list_activity)
        
        mainCont = MainContent(details(),description(),activity())
        return(mainCont)

    
    def core(self,url):
        bs = self.getPage(url)
        
        if(bs != None):
            header_content = bs.find('div',{'class':'aui-page-header-main'})
            head = self.parserHead(header_content)
            head.print()
            
            content = self.parseIssueMain(bs)
            content.print()


        
        
        
    
    


In [116]:

url = 'https://issues.asterisk.org/jira/browse/ASTERISK-28918'


crawler = Crawler()

crawler.core(url)
    
    
    
    

    
    
#<div class="aui-page-header-main">



--- Head --- 

Issue = No Application SIPAddHeader() 

Project = Asterisk 

Key = ASTERISK-28918 

URL = /jira/browse/ASTERISK-28918 

--- Main Content --- 

Detail = 

 Type == Bug 
 Status == Closed 
 Severity == Major 
 Resolution == Not A Bug 
 Affects Version/s == 17.4.0 
 Target Release Version/s == None 
 Component/s == Applications/General 
 Security Level == None 
 Labels == None 
 Environment == CentOS x64 


Description = 

Hello, when attempting to use the application SIPAddHeader(), asterisk returns the following console error:
The documentation doesn't seem to state if there are any dependencies necessary to make it work: https://wiki.asterisk.org/wiki/display/AST/Asterisk+17+Application_SIPAddHeader
Thanks!


Activity = 

User =  Asterisk Team  

Date = 25/May/20 3:20 PM  

Body = Thanks for creating a report! The issue has entered the triage process. That means the issue will wait in this status until a Bug Marshal has an opportunity to review the issue. Once the issue 