# Freecodecamp to Epub

## Importa Bibliotecas

In [None]:
import os
import requests
from bs4 import BeautifulSoup
from ebooklib import epub
import urllib

## Constantes

In [None]:
PROJECT_DIR = os.getcwd()
URL = 'https://www.freecodecamp.org/news/gitting-things-done-book/'
OUTPUT_DIR = './output/epub/'
CHAPTER_TAG = 'h1'

## Faz scrapping do HTML

In [None]:
response = requests.get(URL)
soup = BeautifulSoup(response.content, 'lxml')
ebook = epub.EpubBook()

In [None]:
title = soup.title.text.strip()
content = soup.find('section', {'class': 'post-content'});
author = soup.find('a', {'data-test-label': 'profile-link'}).text.strip()

## Baixa as imagens

In [None]:
def get_images_url(html):
    url_images = []

    for img_tag in content.find_all('img'):
        url = img_tag.get('src')
        if url:
            url_images.append(url)
    
    return url_images

images_url = get_images_url(content)

In [None]:
def get_images_local_path(images_url):
    base_dir = PROJECT_DIR + '/downloads'
    
    images_local_path = []

    for url in images_url:
        parsed_url = urllib.parse.urlparse(url)
        domain = parsed_url.netloc
        path = parsed_url.path.lstrip('/')
        domain_cleaned = ''.join(c if c.isalnum() or c in ['.', '-'] else '_' for c in domain)
        file_path = os.path.join(base_dir, domain_cleaned, path)
        images_local_path.append(file_path)
    
    return images_local_path

images_local_path = get_images_local_path(images_url)

In [None]:
def download_images(urls, paths):
    if len(urls) != len(paths):
        raise ValueError("As listas de URLs e paths devem ter o mesmo tamanho.")
    
    for url, path in zip(urls, paths):
        
        os.makedirs(os.path.dirname(path), exist_ok=True)
        response = requests.get(url)
        
        if response.status_code == 200:
            with open(path, 'wb') as f:
                f.write(response.content)
            print(f"Imagem baixada e salva em {path}")
        else:
            print(f"Falha ao baixar a imagem de {url}")

download_images(images_url, images_local_path)

## Aponta as "src" para as imagens baixadas

In [None]:
def remove_currrent_dir(path_list):
    replaced_paths = []
    
    for path in path_list:
        replaced_paths.append(path.replace(PROJECT_DIR + '/', ''))

    return replaced_paths

epub_images_path = remove_currrent_dir(images_local_path)

In [None]:
def replace_img_srcs(html, new_srcs):

    replaced_html = BeautifulSoup(str(html), 'html.parser')

    for idx, img_tag in enumerate(replaced_html.find_all('img')):
        if idx < len(images_url):
            img_tag['src'] = new_srcs[idx]
    
    return replaced_html

content = replace_img_srcs(content, epub_images_path)

## Gera o eBook

In [None]:
ebook.add_author(author)
ebook.set_title(title)

### Adiciona imagens no ebook

In [None]:
def add_images(book, images_path):
    for index, image in enumerate(images_path):
        
        img = epub.EpubImage(
            uid = f"image{index}",
            file_name = image,
            media_type = 'image/gif',
            content = open(PROJECT_DIR + '/' + image, "rb").read(),
        )
    
        book.add_item(img)

add_images(ebook, epub_images_path)

### Gera os capítulos

In [None]:
def add_chapters(book, html, chapter_tag):
    spine = ['nav']
    chapters = []
    content = BeautifulSoup(str(html), 'html.parser')
    chapter_content = ''
    
    for element in content.find('section').find_all():
        
        if (element.name == chapter_tag):
            
            chapters.append(chapter_content)
            chapter_content = ''

        chapter_content += str(element)
    
    for index, chapter in enumerate(chapters):
        
        soup = BeautifulSoup(str(chapter), 'html.parser')
        title = soup.find(chapter_tag).text if soup.find(chapter_tag) else ''
        c = epub.EpubHtml(title=title, file_name=('chap_' + str(index) + '.xhtml'))
        c.content = str(soup)

        book.add_item(c)
        spine.append(c)

    ebook.spine = spine
            

add_chapters(ebook, content, CHAPTER_TAG)

### Gera o documento

In [None]:
epub.write_epub( OUTPUT_DIR + (title + '.epub'), ebook, {})