In [None]:
from bs4 import BeautifulSoup
import os
import sys
import re
sys.path.append('..')
from dateestimation import DateEstimation 
from tools.envvars import load_env_vars_from_directory

In [None]:
load_env_vars_from_directory('../.env')
cal_file =os.getenv('PODCAST_CAL_FILE')
if not cal_file:
    raise ValueError("PODCAST_CAL_FILE environment variable is not set.")
if not os.path.exists(cal_file):
    raise FileNotFoundError(f"Calendar '{cal_file}' does not exist.")

prefix = os.getenv('PODCAST_PREFIX')
if not prefix:
    raise ValueError("PODCAST_PREFIX environment variable is not set.")

templates = os.getenv('PODCAST_TEMPLATES', '../templates')
css = os.path.join(templates, 'podcast.css')
if not os.path.exists(css):
    raise FileNotFoundError(f"CSS file '{css}' does not exist.")

print(f"Using calendar file: {cal_file}")
print(f"Using podcast prefix: {prefix}")
print(f"Using templates directory: {templates}")
print(f"Using CSS file: {css}")

In [None]:
tdir = "/home/jmrobles/Podcasts/Coffee Break/Archivo"
!ls "$tdir"

In [None]:
def get_epnumber(filename):
    # cálculo de basename del archivo
    basename = os.path.basename(filename)
    
    get_epnumber = re.compile(rf'{prefix}(\d+)_')
    match = get_epnumber.search(basename)
    if match:
        epnumber = int(match.group(1))
    return epnumber

In [None]:
get_epnumber(os.path.join(tdir, 'ep510_B_whisper.html'))

In [None]:
import glob
html_files = glob.glob(os.path.join(tdir, f'ep*.html'))
print (html_files[:10])

In [None]:
de = DateEstimation(cal_file)


In [None]:
with open(css, 'r', encoding='utf-8') as f:
    style_content = f.read()


In [None]:

def put_style(htmlfile, style_content):
    with open(htmlfile, 'r', encoding='utf-8') as f:
        html_content = f.read()
    soup = BeautifulSoup(html_content, 'html.parser')
    existing_styles = soup.find('head').find_all('style')
    # Borrar estilos existentes
    for style in existing_styles:
        style.decompose()
    style_tag = soup.new_tag('style')
    style_tag.string = style_content
    soup.head.append(style_tag)
    with open(htmlfile, 'w', encoding='utf-8') as f:
        f.write(str(soup))

In [None]:

    
def put_date(html, epname, epnumber):
    print(f"Processing episode {epnumber} - {epname}")
    with open(html, 'r', encoding='utf-8') as f:
        html_content = f.read()
    soup = BeautifulSoup(html_content, 'html.parser')
    date = de.estimate_date_from_epnumber(epnumber).strftime('%Y-%m-%d')
    title = soup.find('h2', class_='title')
    titles = title.find_all('span')
    # Hay que reemplazar el contenido de title
    title.clear()
    tid = soup.new_tag('span', id='epid')
    div = soup.new_tag('div')
    div.append(f"{epname} - {date}")
    tid.append(div)
    title.append(tid)
    for t in titles:
        # Si es la de id 'epid', se sustituye por el nombre y la fecha
        if t.get('id') == 'epid':
            continue
        title.append(t)

    
    # Guardar el HTML modificado y pretty-printed
    with open(html, 'w', encoding='utf-8') as f:
        # Utilizar str(soup) para obtener el HTML como cadena
        # y escribirlo en el archivo
        f.write(str(soup.prettify()))
    


In [None]:
# El notebook puede incluir fecha y/o cambiar estilos
# Las variables siguientes controlan lo que se hace
put_date_enabled = False
put_style_enabled = True

In [None]:
epname_regex = re.compile(rf'({prefix}.*)_(whisper|vosk).*')
counter = 0
for html in  html_files:
    epname_fname = os.path.basename(html)
    match = epname_regex.match(epname_fname)
    if not match:
        print(f"Skipping {epname_fname} as it does not match the expected pattern.")
        continue
    epname = match.group(1)
    print(f"Processing {epname} ({counter})...")
    counter += 1
    # Obtener el número de episodio
    epnumber = get_epnumber(html)
    if put_date_enabled:
        put_date(html, epname, epnumber)
    if put_style_enabled:
        put_style(html, style_content)


   