# actions

> Predefined actions you can use in your project

In [None]:
#| default_exp actions

In [None]:
#| export
from tiny_recipe.core import list_actions, recipe_transform, Recipe, Recipes

from fastcore.test import *
import httpx

In [None]:
#| export

@recipe_transform()
def download(url, timeout=30):
    "Download content from URL. Returns text content by default"
    import httpx
    try:
        response = httpx.get(url, timeout=float(timeout))
        response.raise_for_status()  # Raise error for bad status codes
        return response.text
    except httpx.HTTPError as e:
        raise ValueError(f"Failed to download from {url}: {str(e)}")

In [None]:
# Test the download action
test_url = "https://example.com"
result = download(test_url)

test_eq('example' in result, True)  # Use test_eq instead of test_assert

# Test error handling
test_fail(lambda: download("https://thisdoesnotexist.example.com"), 
          contains="Failed to download")

In [None]:
#| export

@recipe_transform()
def select(html, css, first=True, text_only=False):
    "Select elements from HTML using CSS selector. Returns first match by default, or all matches if first=False. Use text_only=True to get only text content."
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(html, 'html.parser')
    results = soup.select(css)
    if not results:
        return None
    
    if text_only:
        if first:
            return results[0].get_text().strip()
        return [r.get_text().strip() for r in results]
    else:
        if first:
            return str(results[0])
        return [str(r) for r in results]

# Test basic selection
html = """
<div class="content">
    <h1>Title</h1>
    <p class="text">First paragraph</p>
    <p class="text">Second paragraph</p>
</div>
"""

# Test single element selection
test_eq(select(html, "h1"), "<h1>Title</h1>")

# Test multiple elements
test_eq(select(html, "p.text", first=True),
        '<p class="text">First paragraph</p>')

test_eq(select(html, "p.text", first=False), 
        ['<p class="text">First paragraph</p>', 
         '<p class="text">Second paragraph</p>'])

# Test multiple elements with text_only
test_eq(select(html, "p.text", first=False, text_only=True), 
        ['First paragraph', 'Second paragraph'])

# Test no matches
test_eq(select(html, "span"), None)

# Test in pipeline with download
test_url = "https://example.com"
recipe = Recipe(
    input=test_url,
    actions="download.select|css=h1"
)
result = recipe.run()
test_eq("Example Domain" in result, True)

In [None]:
#| export
@recipe_transform()
def youtube_captions(video_id, timestamps="True"):
    """Get captions from a YouTube video.
    Args:
        video_id: YouTube video ID (e.g., 'abc123xyz' from 'youtube.com/watch?v=abc123xyz')
        timestamps: "True" or "False" string to include timestamps in output (default: "True")
    Returns:
        String with video captions, optionally with timestamps
    """
    from youtube_transcript_api import YouTubeTranscriptApi
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        
        if timestamps.lower() == "true":
            return '\n'.join(
                f"[{int(entry['start']//60)}:{int(entry['start']%60):02d}] {entry['text']}"
                for entry in transcript
            )
        return '\n'.join(entry['text'] for entry in transcript)
        
    except Exception as e:
        raise ValueError(f"Failed to get captions for video {video_id}: {str(e)}")

# Test cases
test_video = "FcugVUydSBY"  # MKBHD's 100 subscriber milestone video

# Test basic functionality
result = youtube_captions(test_video)
test_eq(len(result) > 0, True)

In [None]:
test_eq('[' in result, True)  # Should have timestamps by default

# Test without timestamps
result_no_time = youtube_captions(test_video, timestamps="False")
test_eq('[' not in result_no_time.replace('[Music]', ''), True)

In [None]:
# Test error handling
test_fail(lambda: youtube_captions("not_a_real_video_id"), 
          contains="Failed to get captions")

# Test in a recipe
recipe = Recipe(
    input=test_video,
    actions="youtube_captions|timestamps=True"
)
result = recipe.run()
test_eq(len(result) > 0, True)

In [None]:
#| export
@recipe_transform()
def extract_video_ids(html):
    """Extract YouTube video IDs from a playlist page.
    Returns list of video IDs from video-title links.
    """
    from bs4 import BeautifulSoup
    import re
    
    soup = BeautifulSoup(html, 'html.parser')
    # Find all video title links
    links = soup.select('a#video-title')
    
    # Extract video IDs from hrefs using regex
    video_ids = []
    for link in links:
        href = link.get('href', '')
        # Match v= parameter in URL
        if match := re.search(r'[?&]v=([^&]+)', href):
            video_ids.append(match.group(1))
    
    return video_ids

# Test with sample HTML
sample_html = """
<div id="meta">
    <h3>
        <a id="video-title" href="/watch?v=QqZUzkPcU7A&list=123&index=1">Video 1</a>
    </h3>
</div>
<div id="meta">
    <h3>
        <a id="video-title" href="/watch?v=ABC123xyz&list=123&index=2">Video 2</a>
    </h3>
</div>
"""

# Test basic extraction
test_eq(extract_video_ids(sample_html), 
        ['QqZUzkPcU7A', 'ABC123xyz'])

# Test empty page
test_eq(extract_video_ids('<div></div>'), [])

# Example usage in a recipe pipeline
recipe = Recipe(
    input="<playlist page html>",
    actions="extract_video_ids.youtube_captions"
)

In [None]:
# Get captions for all videos in a playlist
recipe = Recipe(
    input="https://www.youtube.com/playlist?list=PLfYUBJiXbdtS5Qc3IGnMql2-QZLHHD0g0",
    actions="download"
)
out = recipe.run()

In [None]:
#| export
@recipe_transform()
def playlist_video_ids(playlist_url):
    """Extract video IDs from a YouTube playlist using pytube and create recipes for each video.
    Args:
        playlist_url: Full YouTube playlist URL
    Returns:
        Recipes object containing a recipe for each video
    """
    from pytube import Playlist
    try:
        playlist = Playlist(playlist_url)
        video_ids = [url.split('watch?v=')[1] for url in playlist.video_urls]
        # Create a recipe for each video ID
        return Recipes([Recipe(input=vid) for vid in video_ids])
    except Exception as e:
        raise ValueError(f"Failed to get video IDs from playlist {playlist_url}: {str(e)}")

In [None]:
# Test basic functionality
test_playlist = 'https://www.youtube.com/playlist?list=PLfYUBJiXbdtS5Qc3IGnMql2-QZLHHD0g0'
result = playlist_video_ids(test_playlist)
test_eq(isinstance(result, Recipes), True)
test_eq(len(result.recipes) > 0, True)
test_eq(all(isinstance(r, Recipe) for r in result.recipes), True)
test_eq(all(len(r.input) == 11 for r in result.recipes), True)  # YouTube IDs are 11 chars

# Test error handling
test_fail(lambda: playlist_video_ids("https://youtube.com/not_a_playlist"), 
          contains="Failed to get video IDs")

In [None]:
# Test in a recipe getting captions for all videos
recipe = Recipe(
    input=test_playlist,
    actions="playlist_video_ids.youtube_captions|timestamps=False"
)
concatenated_captions = recipe.run()
concatenated_captions[:500]

"hi I'm Jeremy Howard hi I'm johnf hi I'm\nNate we wanted to give you a quick\nintroduction here to fast HTML have a\nlook at some of these things that we\nmade so this first example is a chatbot\nthat I made to interact with chbt uh may\nneed to try out a styling library for\nthese beautiful looking chat Bubbles and\nthis is the entire source code there's\nno other components to\nit and this is an app that I made based\noff of con's gamer life allows multiple\nuh users to be able to play the game of\nlife an"

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()