# approach 1: using `pipe` library

In [3]:
files = {
    "comp1405.txt": "Today's class we will discuss how for loops work.\nFor loops are a looping structure similar to the while loops we discussed earlier."
}

def load_data(filename: str):
    document = files[filename]
    return document

def partition_data(document: str):
    return document.split("\n")

def invoke_model(text: str):
    return len(text)

In [13]:
from pipe import map

file = "comp1405.txt"

list([file] | map(load_data) | map(partition_data) | map(invoke_model))

[2]

# approach 2: custom pipeline lib

In [15]:
import abc
from typing import List

class Task(abc.ABC):
    """Unit of work that processes data and produces an output"""

    @abc.abstractmethod
    def process(self, data):
        pass

class LoadDocuments(Task):
    documents = {
        "comp1405": [
            "for loop",
            "while loop",
            "sorting",
        ]
    }

    def process(self, data):
        document_path = data["document_path"]
        return LoadDocuments.documents[document_path]


class SummarizeDocuments(Task):
    def process(self, data: List[str]):
        return [len(text) for text in data]


class Pipeline:
    def __init__(self, tasks: List[Task]):
        self.tasks = tasks
    
    def run(self, data):
        for task in self.tasks:
            data = task.process(data)
        return data

In [16]:
pipeline = Pipeline([
    LoadDocuments(),
    SummarizeDocuments(),
])
pipeline.run({"document_path": "comp1405"})

[8, 10, 7]