# Search links in docs

pip install nbformat

In [7]:
import os
import nbformat

def search_text_in_files(directory, search_text_base):
    notebooks_with_text = []
    markdown_files_with_text = []

    # Recorre todos los archivos de la carpeta
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            if file.endswith(".ipynb"):
                with open(file_path, 'r', encoding='utf-8') as f:
                    nb = nbformat.read(f, as_version=4)
                    
                    # Recorre todas las celdas del notebook
                    for cell in nb.cells:
                        if cell.cell_type == 'markdown':
                            if search_text_base in cell.source:
                                notebooks_with_text.append(file_path)
                                break
            elif file.endswith(".md"):
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                    if search_text_base in content:
                        markdown_files_with_text.append(file_path)

    return notebooks_with_text, markdown_files_with_text

# Especifica la carpeta que contiene los notebooks y el texto a buscar
directory_path = '../docs/'
search_texts = [
    "https://skforecast.org/latest/introduction-forecasting/forecaster-parameters",
    "https://skforecast.org/latest/introduction-forecasting/forecaster-attributes",
    "https://skforecast.org/latest/user_guides/quick-start-skforecast",
    "https://skforecast.org/latest/user_guides/input-data",
    "https://skforecast.org/0.",
    "https://skforecast.org/0.12.1/user_guides/plot-forecaster-residuals",
    "https://skforecast.org/latest/user_guides/plot-forecaster-residuals"
]
search_texts = [
    "https://skforecast.org",
    "https://skforecast.org/latest/user_guides/user_guides",
    "../user_guides/user_guides",
    "https://skforecast.org/latest/faq/faq",
    "../faq/faq",
    "joaquin"
]

for search_text_base in search_texts:

    print(search_text_base)
    print("=" * len(search_text_base))

    # Llama a la función y obtén los notebooks que contienen el texto buscado
    notebooks, markdown_files = search_text_in_files(directory_path, search_text_base)

    # Imprime la lista de notebooks que contienen el texto
    if notebooks:
        print("The following notebooks contain the specified text:")
        for notebook in notebooks:
            print(notebook)
    else:
        print("No notebooks contain the specified text.")

    if markdown_files:
        print("\nThe following markdown files contain the specified text:")
        for markdown_file in markdown_files:
            print(markdown_file)
    else:
        print("No markdown files contain the specified text.")

    print("=" * len(search_text_base))
    print("\n")


https://skforecast.org
The following notebooks contain the specified text:
../docs/user_guides\forecasting-sarimax-arima.ipynb
../docs/user_guides\probabilistic-forecasting.ipynb
../docs/user_guides\weighted-time-series-forecasting.ipynb

The following markdown files contain the specified text:
../docs/README.md
../docs/releases\releases.md


https://skforecast.org/latest/user_guides/user_guides


  validate(nb)


No notebooks contain the specified text.
No markdown files contain the specified text.


../user_guides/user_guides
No notebooks contain the specified text.
No markdown files contain the specified text.


https://skforecast.org/latest/faq/faq
No notebooks contain the specified text.
No markdown files contain the specified text.


../faq/faq
No notebooks contain the specified text.
No markdown files contain the specified text.


joaquin
No notebooks contain the specified text.

The following markdown files contain the specified text:
../docs/README.md
../docs/releases\releases.md




# Test links docs

In [3]:
base_url = "https://skforecast.org/0.14.0/"

# Lista de rutas extraídas del archivo mkdocs.yml
paths_general = [
    "",

    "examples/examples_english.md",
    "examples/examples_spanish.md",

    "api/ForecasterRecursive.md",
    "api/ForecasterDirect.md",
    "api/ForecasterRecursiveMultiSeries.md",
    "api/ForecasterDirectMultiVariate.md",
    "api/ForecasterRnn.md",
    "api/Sarimax.md",
    "api/ForecasterSarimax.md",
    "api/ForecasterEquivalentDate.md",
    "api/model_selection.md",
    "api/feature_selection.md",
    "api/preprocessing.md",
    "api/metrics.md",
    "api/plot.md",
    "api/utils.md",
    "api/datasets.md",
    "api/exceptions.md",

    "releases/releases.md",

    "authors/authors.md"
]

paths_user_guides = [
    "introduction-forecasting/introduction-forecasting.md",

    "user_guides/migration-guide.ipynb",
    
    "quick-start/quick-start-skforecast.ipynb",
    "quick-start/forecaster-parameters.md",
    "quick-start/forecaster-attributes.ipynb",
    "quick-start/how-to-install.md",
    
    "user_guides/table-of-contents.md",
    "user_guides/migration-guide.html",
    "user_guides/input-data.ipynb",
    "user_guides/autoregresive-forecaster.ipynb",
    "user_guides/direct-multi-step-forecasting.ipynb",
    "user_guides/forecasting-sarimax-arima.ipynb",
    "user_guides/forecasting-baseline.ipynb",
    "user_guides/independent-multi-time-series-forecasting.ipynb",
    "user_guides/multi-series-with-different-length-and-different_exog.ipynb",
    "user_guides/dependent-multi-series-multivariate-forecasting.ipynb",
    "user_guides/forecasting-with-deep-learning-rnn-lstm.ipynb",
    "user_guides/exogenous-variables.ipynb",
    "user_guides/window-features-and-custom-features.ipynb",
    "user_guides/categorical-features.ipynb",
    "user_guides/calendar-features.ipynb",
    "user_guides/sklearn-transformers-and-pipeline.ipynb",
    "user_guides/time-series-differentiation.ipynb",
    "user_guides/feature-selection.ipynb",
    "user_guides/metrics.ipynb",
    "user_guides/backtesting.ipynb",
    "user_guides/hyperparameter-tuning-and-lags-selection.ipynb",
    "user_guides/feature-selection.ipynb",
    "user_guides/probabilistic-forecasting.ipynb",
    "user_guides/explainability.ipynb",
    "user_guides/save-load-forecaster.ipynb",
    "user_guides/forecaster-in-production.ipynb",
    "user_guides/plotting.ipynb",
    "user_guides/datasets.ipynb",
    "user_guides/training-and-prediction-matrices.ipynb",
    "user_guides/weighted-time-series-forecasting.ipynb",
    "user_guides/stacking-ensemble-models-forecasting.ipynb",
    "user_guides/forecasting-xgboost-lightgbm.ipynb",
    "user_guides/skforecast-in-GPU.ipynb",

    "faq/table-of-contents.md",
    "faq/non-negative-predictions.ipynb",
    "faq/forecasting-time-series-with-missing-values.ipynb",
    "faq/forecasting-with-delayed-historical-data.ipynb",
    "faq/parameters-search-backtesting-vs-one-step-ahead.ipynb",
    "faq/cyclical-features-time-series.ipynb",
    "faq/time-series-aggregation.ipynb",
    "faq/parallelization-skforecast.ipynb",
    "faq/profiling-skforecast.ipynb",
]


# Función para cambiar la extensión a .html
def change_extension_to_html(path):
    if path.endswith(".md") or path.endswith(".ipynb"):
        return path.rsplit(".", 1)[0] + ".html"
    return path


# Generar enlaces completos con extensión .html
links_general = [
    base_url + change_extension_to_html(path) for path in paths_general
]
links_user_guides = [
    base_url + change_extension_to_html(path) for path in paths_user_guides
]

In [4]:
from colorama import Fore, Style

print("Esto es un texto normal")
print(Fore.RED + "Este texto es rojo" + Style.RESET_ALL)
print(Fore.GREEN + "Este texto es verde" + Style.RESET_ALL)
print(Fore.BLUE + "Este texto es azul" + Style.RESET_ALL)
print(Style.BRIGHT + Fore.RED + "Este texto es rojo en negrita" + Style.RESET_ALL)
print("Esto es un texto normal")

Esto es un texto normal
[31mEste texto es rojo[0m
[32mEste texto es verde[0m
[34mEste texto es azul[0m
[1m[31mEste texto es rojo en negrita[0m
Esto es un texto normal


In [5]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from colorama import Fore, Style


def check_links_ignoring_nav_links(url):
    try:
        # Realizar la solicitud a la página
        response = requests.get(url)
        response.raise_for_status()  # Verificar si la solicitud fue exitosa
        print(f"[OK] Página accesible: {url}")
    except requests.exceptions.RequestException as e:
        print(
            Style.BRIGHT + Fore.RED + 
            f"[ERROR] No se pudo acceder a {url}: {e}"
            + Style.RESET_ALL
        )
        return [url]

    # Parsear el contenido HTML
    soup = BeautifulSoup(response.content, "html.parser")

    # Encontrar todos los enlaces en la página
    links = soup.find_all("a", href=True)

    # Filtrar los enlaces que no tengan alguna de las siguientes clases
    classes_no_to_visit = [
        'md-header__button', 'md-logo', "md-nav__link", "md-tabs__link",
        'md-source', 'md-social__link', 'autorefs-external'
    ]

    # Recorrer todos los enlaces filtrados
    broken_links = []
    for link in links:
        class_link = link.get("class", [])

        # Ignorar los enlaces que tengan alguna de las clases en "classes_no_to_visit"
        if any(class_no_to_visit in class_link for class_no_to_visit in classes_no_to_visit):
            continue

        # Convertir enlaces relativos en absolutos
        href = link['href']
        if href[0] == "#":
            continue

        full_url = urljoin(url, href)

        try:
            link_response = requests.get(full_url)
            link_response.raise_for_status()  # Verificar si el enlace es accesible
            print(f"  [OK] Enlace accesible: {class_link} - {full_url}")
        except requests.exceptions.RequestException:
            print(
                Style.BRIGHT + Fore.RED + 
                f"  [ERROR] Enlace roto o inaccesible: {full_url}" 
                + Style.RESET_ALL
            )
            broken_links.append(
                f"[ERROR] Enlace roto o inaccesible en {url}: {full_url}"
            )

    return broken_links

# Ejemplo de uso
url_to_check = "https://skforecast.org/0.14.0/user_guides/table-of-contents.html"
# check_links_ignoring_nav_links(url_to_check)

In [6]:
# Check all links in user guides list
# ==============================================================================
broken_links_user_guides = []
for link in links_user_guides:
    broken_links = check_links_ignoring_nav_links(link)
    broken_links_user_guides.extend(broken_links)

print("")
print("")
print(Style.BRIGHT + Fore.RED + "Resumen de enlaces rotos:")
print("-" * 80)
print("\n".join(broken_links_user_guides) + Style.RESET_ALL)

[OK] Página accesible: https://skforecast.org/0.14.0/introduction-forecasting/introduction-forecasting.html
  [OK] Enlace accesible: [] - https://skforecast.org/
  [OK] Enlace accesible: [] - https://skforecast.org/0.14.0/user_guides/autoregresive-forecaster
  [OK] Enlace accesible: [] - https://skforecast.org/0.14.0/user_guides/direct-multi-step-forecasting
  [OK] Enlace accesible: [] - https://skforecast.org/0.14.0/introduction-forecasting/introduction-forecasting#recursive-multi-step-forecasting
  [OK] Enlace accesible: [] - https://skforecast.org/0.14.0/user_guides/independent-multi-time-series-forecasting
  [OK] Enlace accesible: [] - https://skforecast.org/0.14.0/user_guides/dependent-multi-series-multivariate-forecasting
  [OK] Enlace accesible: [] - https://skforecast.org/0.14.0/user_guides/autoregresive-forecaster
  [OK] Enlace accesible: [] - https://skforecast.org/0.14.0/user_guides/direct-multi-step-forecasting
  [OK] Enlace accesible: [] - https://skforecast.org/0.14.0/use

In [7]:
# Check all links in general list
# ==============================================================================
broken_links_general = []
for link in links_general:
    broken_links = check_links_ignoring_nav_links(link)
    broken_links_general.extend(broken_links)

print("")
print("")
print(Style.BRIGHT + Fore.RED + "Resumen de enlaces rotos:")
print("-" * 80)
print("\n".join(broken_links_general) + Style.RESET_ALL)

[OK] Página accesible: https://skforecast.org/0.14.0/
  [OK] Enlace accesible: [] - https://skforecast.org/
  [OK] Enlace accesible: [] - https://pypi.org/project/skforecast/
  [OK] Enlace accesible: [] - https://codecov.io/gh/skforecast/skforecast
  [OK] Enlace accesible: [] - https://github.com/skforecast/skforecast/actions/workflows/unit-tests.yml/badge.svg
  [OK] Enlace accesible: [] - https://www.repostatus.org/#active
  [OK] Enlace accesible: [] - https://github.com/skforecast/skforecast/graphs/commit-activity
  [OK] Enlace accesible: [] - https://pepy.tech/project/skforecast
  [OK] Enlace accesible: [] - https://pepy.tech/project/skforecast
  [OK] Enlace accesible: [] - https://github.com/skforecast/skforecast/blob/master/LICENSE
  [OK] Enlace accesible: [] - https://zenodo.org/doi/10.5281/zenodo.8382787
  [OK] Enlace accesible: [] - https://www.paypal.com/donate/?hosted_button_id=D2JZSWRLTZDL6
  [OK] Enlace accesible: [] - https://www.buymeacoffee.com/skforecast
  [OK] Enlace a