Set a pipeline which can fail at each step.

In [57]:
import dlt


# define a resource with an intentional failure
@dlt.resource(name="example_resource")
def example_resource(should_fail_extraction: bool = False):

    for i in range(10):
        if should_fail_extraction and i == 5:
            raise Exception("Intentional failure at extraction step (i=5)")
        yield {"value": i}

def run_pipeline(should_fail_extraction: bool, should_fail_normalization: bool, should_fail_load: bool):
    pipeline = dlt.pipeline(
        pipeline_name="failure_simulation_pipeline",
        destination="duckdb",
        dataset_name="test_dataset",
        pipelines_dir="pipeline_jobs" # directory to store .dlt files
    )

    try:
        # extract
        extraction = pipeline.extract(example_resource(should_fail_extraction=should_fail_extraction))
        print("Extraction completed successfully.")
        print(f"Extracted data: {extraction}")

        # normalize
        if should_fail_normalization:
            def failing_normalize():
                raise Exception("Intentional failure at normalization step")
            pipeline.normalize = failing_normalize

        normalization = pipeline.normalize()
        print("Normalization completed successfully.")
        print(f"Normalized data: {normalization}")

        # load
        if should_fail_load:
            def failing_load():
                raise Exception("Intentional failure at load step")
            pipeline.load = failing_load

        load_result = pipeline.load()
        print("Load completed successfully.")
        print(f"Load result: {load_result}")

        print("Pipeline completed successfully.")

    except Exception as e:
        print(f"EXCEPTION during pipeline execution: {e}")

    return pipeline

# run the pipeline with different failure scenarios
my_pipeline = run_pipeline(
    should_fail_extraction=False,
    should_fail_normalization=False,
    should_fail_load=False
)


Extraction completed successfully.
Extracted data: 
Load package 1739263057.7818828 is EXTRACTED and NOT YET LOADED to the destination and contains no failed jobs
Normalization completed successfully.
Normalized data: Normalized data for the following tables:
- _dlt_pipeline_state: 1 row(s)
- example_resource: 10 row(s)

Load package 1739263057.7818828 is NORMALIZED and NOT YET LOADED to the destination and contains no failed jobs
Load completed successfully.
Load result: Pipeline failure_simulation_pipeline load step completed in 1.21 seconds
1 load package(s) were loaded to destination duckdb and into dataset test_dataset
The duckdb destination used duckdb:////Users/guy.wheeler/Projects/moj-dlt-workshop/evaluation/graceful_recovery/failure_simulation_pipeline.duckdb location to store data
Load package 1739263057.7818828 is LOADED and contains no failed jobs
Pipeline completed successfully.


Print output of loaded data

In [58]:
with my_pipeline.sql_client() as c:
    tables_df = c.execute("SHOW TABLES").df()
print("TABLES:")
print(tables_df)

with my_pipeline.sql_client() as c:
    df = c.execute("SELECT * FROM example_resource").df()
df

TABLES:
                  name
0           _dlt_loads
1  _dlt_pipeline_state
2         _dlt_version
3     example_resource


Unnamed: 0,value,_dlt_load_id,_dlt_id
0,0,1739263057.7818828,7W8YHibojPfGKw
1,1,1739263057.7818828,7PIye9eZDjXwRw
2,2,1739263057.7818828,Dkd0vKFA6H8djg
3,3,1739263057.7818828,ZEu+aN2dpaMCvg
4,4,1739263057.7818828,AbwVauwAu0Y6xA
5,5,1739263057.7818828,pUzZ2vLNU+43qQ
6,6,1739263057.7818828,tA8/4oWFecs0Sw
7,7,1739263057.7818828,CmOpbAtEksH0EQ
8,8,1739263057.7818828,6onPVTQ6bwzkkw
9,9,1739263057.7818828,O549t7TC1c2Z1A
