In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
import logging
import functools
import operator

In [36]:
log = logging.getLogger()
log.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch = logging.StreamHandler()
ch.setFormatter(formatter)
log.addHandler(ch)

In [60]:
def get_details_podcast(url):
    r = requests.get(url)
    soup = bs(r.text, 'html.parser')
    
    titles = []
    for t in soup.find_all("h2", class_="podcast-title"):
        title = t.get_text(strip=True)
        titles.append(title)
        
    dates = []
    for date in soup.find_all('span'):
        d = str(date)
        if 'class="podcast-date"' in d:
            dates.append(date.text)
    
    durations = []
    for duration in soup.find_all('li','item'):
        d = str(duration)
        if 'class="fa fa-clock"' in d:
            durations.append(duration.text)
    
    links = []
    for link in soup.find_all('a'):
        l = str(link)
        if 'class="podcast-play"' in l:
            links.append(link.get('href'))
    
    details_podcast = []
    for t, date, duration, link in zip(titles,dates,durations,links):
        details_podcast.append((t, date, duration, link))
    
    return details_podcast

In [61]:
url = "https://bibotalk.com/categoria/podcast/btcast-abc2/page/{}/"
page = 1
pages = 5
details_btcast = []

while page < pages:
    lst_get = get_details_podcast(url.format(page))
    log.debug(f"coletado {len(lst_get)} episódios do link: {url.format(page)}")
    details_btcast.append(lst_get)
    page += 1

2022-09-07 14:53:06,815 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): bibotalk.com:443
2022-09-07 14:53:08,118 - urllib3.connectionpool - DEBUG - https://bibotalk.com:443 "GET /categoria/podcast/btcast-abc2/page/1/ HTTP/1.1" 301 None
2022-09-07 14:53:08,446 - urllib3.connectionpool - DEBUG - https://bibotalk.com:443 "GET /categoria/podcast/btcast-abc2/ HTTP/1.1" 200 None
2022-09-07 14:53:08,696 - root - DEBUG - coletado 20 episódios do link: https://bibotalk.com/categoria/podcast/btcast-abc2/page/1/
2022-09-07 14:53:08,703 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): bibotalk.com:443
2022-09-07 14:53:09,911 - urllib3.connectionpool - DEBUG - https://bibotalk.com:443 "GET /categoria/podcast/btcast-abc2/page/2/ HTTP/1.1" 200 None
2022-09-07 14:53:10,234 - root - DEBUG - coletado 17 episódios do link: https://bibotalk.com/categoria/podcast/btcast-abc2/page/2/
2022-09-07 14:53:10,240 - urllib3.connectionpool - DEBUG - Starting new HTTPS conn

In [62]:
details_btcast

[[('O antropoceno e a igreja – BTCast ABC2 044',
   '25 de agosto de 2022',
   ' 01:00:14',
   'https://bibotalk.com/podcast/o-antropoceno-e-a-igreja-btcast-abc2-044/'),
  ('Cultivando uma comunidade intelectual – BTCast ABC2 043',
   '21 de julho de 2022',
   ' 00:52:49',
   'https://bibotalk.com/podcast/cultivando-uma-comunidade-intelectual-btcast-abc2-043/'),
  ('Igreja e metaverso – BTCAST ABC2 042',
   '23 de junho de 2022',
   ' 01:01:39',
   'https://bibotalk.com/podcast/igreja-e-metaverso-btcast-abc2-042/'),
  ('Conhecimento e adoração – BTCast ABC2 041',
   '26 de maio de 2022',
   ' 0:51:23',
   'https://bibotalk.com/podcast/conhecimento-e-adoracao-btcast-abc2-041/'),
  ('Ciência e religião – BTCast ABC2 040',
   '19 de abril de 2022',
   ' 01:00:57',
   'https://bibotalk.com/podcast/ciencia-e-religiao-btcast-abc2-040/'),
  ('O dilúvio nas religiões e na ciência – BTCast ABC2 039',
   '22 de março de 2022',
   ' 00:57:44',
   'https://bibotalk.com/podcast/o-diluvio-nas-religi

In [71]:
def functools_reduce(a):
    return functools.reduce(operator.concat, a)

In [79]:
details_btcast_list = functools_reduce(details_btcast)

In [80]:
details_btcast_list

[('O antropoceno e a igreja – BTCast ABC2 044',
  '25 de agosto de 2022',
  ' 01:00:14',
  'https://bibotalk.com/podcast/o-antropoceno-e-a-igreja-btcast-abc2-044/'),
 ('Cultivando uma comunidade intelectual – BTCast ABC2 043',
  '21 de julho de 2022',
  ' 00:52:49',
  'https://bibotalk.com/podcast/cultivando-uma-comunidade-intelectual-btcast-abc2-043/'),
 ('Igreja e metaverso – BTCAST ABC2 042',
  '23 de junho de 2022',
  ' 01:01:39',
  'https://bibotalk.com/podcast/igreja-e-metaverso-btcast-abc2-042/'),
 ('Conhecimento e adoração – BTCast ABC2 041',
  '26 de maio de 2022',
  ' 0:51:23',
  'https://bibotalk.com/podcast/conhecimento-e-adoracao-btcast-abc2-041/'),
 ('Ciência e religião – BTCast ABC2 040',
  '19 de abril de 2022',
  ' 01:00:57',
  'https://bibotalk.com/podcast/ciencia-e-religiao-btcast-abc2-040/'),
 ('O dilúvio nas religiões e na ciência – BTCast ABC2 039',
  '22 de março de 2022',
  ' 00:57:44',
  'https://bibotalk.com/podcast/o-diluvio-nas-religioes-e-na-ciencia-btcast-

In [86]:
columns = {'title': [], 
        'date': [], 
        'duration': [], 
        'link': []}
df_details_btcast_teste = pd.DataFrame(details_btcast_list,columns=columns)

In [87]:
df_details_btcast_teste

Unnamed: 0,title,date,duration,link
0,O antropoceno e a igreja – BTCast ABC2 044,25 de agosto de 2022,01:00:14,https://bibotalk.com/podcast/o-antropoceno-e-a...
1,Cultivando uma comunidade intelectual – BTCast...,21 de julho de 2022,00:52:49,https://bibotalk.com/podcast/cultivando-uma-co...
2,Igreja e metaverso – BTCAST ABC2 042,23 de junho de 2022,01:01:39,https://bibotalk.com/podcast/igreja-e-metavers...
3,Conhecimento e adoração – BTCast ABC2 041,26 de maio de 2022,0:51:23,https://bibotalk.com/podcast/conhecimento-e-ad...
4,Ciência e religião – BTCast ABC2 040,19 de abril de 2022,01:00:57,https://bibotalk.com/podcast/ciencia-e-religia...
5,O dilúvio nas religiões e na ciência – BTCast ...,22 de março de 2022,00:57:44,https://bibotalk.com/podcast/o-diluvio-nas-rel...
6,O Fim do Mundo – BTCast ABC2 038,22 de fevereiro de 2022,01:04:38,https://bibotalk.com/podcast/o-fim-do-mundo-bt...
7,O Cristão de Humanas na Universidade – BTCast ...,25 de janeiro de 2022,00:56:31,https://bibotalk.com/podcast/o-cristao-de-huma...
8,Fraudes na Ciência e na Fé – BTCast ABC2 036,15 de dezembro de 2021,01:07:26,https://bibotalk.com/podcast/btcast/fraudes-na...
9,"Perguntas difíceis, respostas sinceras – BTCas...",24 de novembro de 2021,01:06:26,https://bibotalk.com/podcast/perguntas-dificei...
