# Master Python for LLMs - Part 3

## Functions and Lambda: The Foundation of Processing in LLMs with Python

### The Importance of Well-Structured Functions

In [1]:
from typing import List, Dict

def preprocess_text(
    text: str,
    stop_words: set = None,
    min_length: int = 3
) -> List[str]:
    """
    Preprocesses text for use with LLMs.

    Args:
        text: Text to process
        stop_words: Set of words to filter
        min_length: Minimum token length to keep

    Returns:
        List of processed and filtered tokens
    """
    tokens = text.lower().split()
    if stop_words:
        tokens = [t for t in tokens if t not in stop_words]
    return [t for t in tokens if len(t) >= min_length]

### The Power of Lambda Expressions

In [2]:
# Lambda functions for common transformations
normalize = lambda x: x.lower().strip()
extract_entities = lambda text: [word for word in text.split() if word[0].isupper()]
calculate_length = lambda text: len(text.split())

# Practical application
texts = ['  Machine Learning is FASCINATING  ', 'Python for NLP   ']
normalized_texts = list(map(normalize, texts))

### Combining Functions and Functional Operators

In [3]:
from functools import reduce

def create_processing_pipeline(texts: List[str]) -> List[str]:
    # Step 1: Normalization
    normalized = map(normalize, texts)

    # Step 2: Filtering empty texts
    filtered = filter(lambda x: len(x) > 0, normalized)

    # Step 3: Tokenization and cleaning
    processed = [preprocess_text(text) for text in filtered]

    return list(processed)

### Best Practices for Functions in LLM Projects

In [4]:
def process_prompt(text: str, max_length: int = 1000) -> str:
    """
    Processes and validates a prompt to send to an LLM.
    """
    if not text:
        raise ValueError("The prompt cannot be empty")

    processed_text = normalize(text)

    if len(processed_text) > max_length:
        raise ValueError(f"The prompt exceeds the maximum length of {max_length} characters")

    return processed_text