diff --git a/CHANGELOG.md b/CHANGELOG.md
index 583acc98579..8367a8aff6e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,11 +1,21 @@
 # openpipelines 3.x.x
 
+## BREAKING
+
+* `differential_expression/create_pseudobulks`: Removed functionality to filter psuedobulk samples based on number of aggregated samples threshold, as this functionality is now covered in `filter/delimit_count` (PR #1044).
+
 ## NEW FUNCTIONALITY
 
 * `filter/filter_with_pattern`: Filters a MuData object based on gene names using a regex pattern (PR #1070).
 
 * `filter/delimit_counts`: Turns an .obs column of a MuData file containing count data into a boolean column based on thresholds (PR #1069)
 
+## EXPERIMENTAL
+
+* `differential_expression/deseq2`: Performs differential expression analysis using DESeq2 on bulk or pseudobulk datasets (PR #1044).
+
+* `workflows/differential_expression/pseudobulk_deseq2`: Workflow for generating pseudobulk samples from single-cell data followed by DESeq2 differential expression analysis (PR #1044)
+
 # openpipelines 3.0.0
 
 ## BREAKING CHANGES
diff --git a/src/differential_expression/create_pseudobulk/config.vsh.yaml b/src/differential_expression/create_pseudobulk/config.vsh.yaml
index aa90b260be1..6b0f0aa3588 100644
--- a/src/differential_expression/create_pseudobulk/config.vsh.yaml
+++ b/src/differential_expression/create_pseudobulk/config.vsh.yaml
@@ -1,5 +1,5 @@
-name: differential_expression
-namespace: "create_pseudobulk"
+name: create_pseudobulk
+namespace: differential_expression
 scope: "public"
 description: |
   Generation of pseudobulk samples from single-cell transcriptomics data,
@@ -54,11 +54,6 @@ argument_groups:
         choices: ["sum", "mean"]
         default: "sum"
         description: "Method to aggregate the raw counts for pseudoreplicates. Either sum or mean."
-      - name: "--min_obs_per_sample"
-        type: integer
-        min: 1
-        default: 30
-        description: "Minimum number of cells per pseudobulk sample."
       - name: --random_state
         type: integer
         description: |
diff --git a/src/differential_expression/create_pseudobulk/script.py b/src/differential_expression/create_pseudobulk/script.py
index a9af50f6d34..257893c6e69 100644
--- a/src/differential_expression/create_pseudobulk/script.py
+++ b/src/differential_expression/create_pseudobulk/script.py
@@ -13,7 +13,6 @@
     "obs_label": "cell_type",
     "obs_groups": ["treatment", "donor_id", "disease"],
     "obs_cell_count": "n_cells",
-    "min_obs_per_sample": 5,
     "random_state": 0,
     "output": "test.h5mu",
     "output_compression": "gzip",
@@ -92,7 +91,6 @@ def main():
     # Filter pseudobulk samples based on minimum observation count
     logger.info("Filtering pseudobulk samples based on minimum observation count...")
     adata_pb.obs[par["obs_cell_count"]] = count_obs(adata, adata_pb, pseudobulk_cols)
-    adata_pb = adata_pb[adata_pb.obs[par["obs_cell_count"]] > par["min_obs_per_sample"]]
 
     logger.info(
         f"Final dataset: {adata_pb.n_obs} pseudobulk samples, {adata_pb.n_vars} genes"
diff --git a/src/differential_expression/create_pseudobulk/test.py b/src/differential_expression/create_pseudobulk/test.py
index f40963ee553..859be8874b1 100644
--- a/src/differential_expression/create_pseudobulk/test.py
+++ b/src/differential_expression/create_pseudobulk/test.py
@@ -58,8 +58,6 @@ def test_multiple_factors(run_component, random_h5mu_path):
             "donor_id",
             "--obs_groups",
             "treatment",
-            "--min_obs_per_sample",
-            "5",
             "--output_compression",
             "gzip",
         ]
@@ -78,36 +76,5 @@ def test_multiple_factors(run_component, random_h5mu_path):
     )
 
 
-def test_filtering(run_component, random_h5mu_path):
-    output_path = random_h5mu_path()
-
-    run_component(
-        [
-            "--input",
-            input_path,
-            "--output",
-            output_path,
-            "--obs_label",
-            "cell_type",
-            "--obs_groups",
-            "treatment",
-            "--min_obs_per_sample",
-            "50",
-            "--output_compression",
-            "gzip",
-        ]
-    )
-
-    assert os.path.exists(output_path), "Output query file does not exist"
-    mdata = mu.read_h5mu(output_path)
-    adata = mdata.mod["rna"]
-
-    expected_obs = ["treatment", "cell_type", "n_cells"]
-    assert all(col in adata.obs for col in expected_obs), (
-        f"Expected columns {expected_obs} not found in .obs"
-    )
-    assert adata.shape[0] == 4, "Expected a total of 8 pseudobulk samples in the output"
-
-
 if __name__ == "__main__":
     sys.exit(pytest.main([__file__]))
diff --git a/src/differential_expression/deseq2/config.vsh.yaml b/src/differential_expression/deseq2/config.vsh.yaml
new file mode 100644
index 00000000000..d5385a5f834
--- /dev/null
+++ b/src/differential_expression/deseq2/config.vsh.yaml
@@ -0,0 +1,136 @@
+name: deseq2
+namespace: differential_expression
+description: |
+  Performs differential expression analysis using DESeq2 on bulk samples or pseudobulk samples aggregated from single-cell data.
+  Note that this component only considers factors as explanatory variables, and excludes covariates from the analysis.
+
+authors:
+  - __merge__: /src/authors/jakub_majercik.yaml
+    roles: [ author ]
+  - __merge__: /src/authors/dorien_roosen.yaml
+    roles: [ author ]
+  - __merge__: /src/authors/dries_de_maeyer.yaml
+    roles: [ contributor ]
+  - __merge__: /src/authors/weiwei_schultz.yaml
+    roles: [ contributor ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: "--input"
+        alternatives: ["-i"]
+        type: file
+        description: Input h5mu file containing (pseudo-)bulk transcriptomic samples.
+        direction: input
+        required: true
+      - name: "--modality"
+        description: |
+          Which modality from the input MuData file to process.
+        type: string
+        default: "rna"
+        required: false
+      - name: "--input_layer"
+        type: string
+        required: false
+        description: "Input layer to use. If None, X is used. This layer must contain raw counts."
+      - name: "--var_gene_names"
+        type: string
+        description: |
+          Name of the .var field that contains gene symbols. If not provided, .var.index will be used.
+        required: false
+        example: "gene_symbol"
+      - name: "--obs_cell_group"
+        type: string
+        description: |
+          .obs field containing the cell group information, for example per cell type or per cell cluster.
+          If true, performs per-cell-group analysis with cell-group-specific results.
+
+  - name: Arguments
+    arguments:
+      - name: "--design_formula"
+        type: string
+        description: |
+          The formula should be a tilde (~) followed by the variables with plus signs between them. 
+          The design formula is used to estimate the dispersions and to estimate the log2 fold changes of the model.
+        required: true
+        example: "~ disease + treatment"
+      - name: "--contrast_column"
+        type: string
+        description: |
+          Column in the metadata to use for the contrast.
+          This column should contain the conditions to compare.
+        required: true
+        example: "treatment"
+      - name: "--contrast_values"
+        type: string
+        multiple: true
+        description: |
+          Values to compare in the contrast column.
+          First value is the control group, following values are comparison groups. 
+          Values must be present in the fields specified for the design formula.
+        required: true
+        example: ["ctrl", "stim"]
+      - name: "--p_adj_threshold"
+        type: double
+        min: 0
+        max: 1
+        description: |
+          Adjusted p-value threshold for significance, corrected with the Benjamini and Hochberg method.
+          Genes with adjusted p-values below this threshold will be considered significant.
+        default: 0.05
+        required: false
+      - name: "--log2fc_threshold"
+        type: double
+        min: 0
+        description: |
+          Log2 fold change threshold for significance.
+          Genes with absolute log2 fold change above this threshold will be considered significant.
+        default: 0.0
+        required: false
+
+  - name: Outputs
+    arguments:
+      - name: "--output_dir"
+        alternatives: ["-o"]
+        type: file
+        description: |
+          Output directory for DESeq2 results. 
+          If cell groups are defined (using `--obs_cell_group`), the output folder will contain one CSV per cell group, otherwise it will contain a single output file.
+        direction: output
+        required: true
+      - name: "--output_prefix"
+        type: string
+        description: |
+          Prefix for output CSV files. 
+          If no cell groups are specified, the output file will be named "{prefix}.csv".
+          If cell groups are specified, files will be named "{prefix}_{cell_group}.csv".
+        default: "deseq2_analysis"
+        required: false
+
+resources:
+  - type: r_script
+    path: script.R
+test_resources:
+  - type: python_script
+    path: test.py
+  - path: /resources_test/annotation_test_data/TS_Blood_filtered_pseudobulk.h5mu
+
+engines:
+- type: docker
+  image: rocker/r2u:22.04
+  setup:
+    - type: apt
+      packages: [ libhdf5-dev, libgeos-dev, hdf5-tools ]
+    - type: r
+      cran: [ hdf5r ]
+      # Use pinned version to avoid H5 file access issues with latest stable release (0.99.0)
+      github: scverse/anndataR@36f3caad9a7f360165c1510bbe0c62657580415a
+      bioc: [ DESeq2 ]
+  test_setup:
+    - type: apt
+      packages: [ python3, python3-pip, python3-dev, python-is-python3 ]
+  __merge__: [ ., /src/base/requirements/python_test_setup.yaml]
+
+runners:
+- type: executable
+- type: nextflow
\ No newline at end of file
diff --git a/src/differential_expression/deseq2/script.R b/src/differential_expression/deseq2/script.R
new file mode 100644
index 00000000000..a6bde13b473
--- /dev/null
+++ b/src/differential_expression/deseq2/script.R
@@ -0,0 +1,419 @@
+library(DESeq2)
+library(anndataR)
+library(hdf5r)
+
+## VIASH START
+par <- list(
+  input = paste0(
+    "resources_test/annotation_test_data/",
+    "TS_Blood_filtered_pseudobulk.h5mu"
+  ),
+  output_dir = "./test_deseq2_no_cellgroup/",
+  output_prefix = "deseq2_analysis",
+  input_layer = NULL,
+  modality = "rna",
+  obs_cell_group = "cell_type",
+  design_formula = "~ treatment",
+  contrast_column = "treatment",
+  contrast_values = c("ctrl", "stim"),
+  p_adj_threshold = 0.05,
+  log2fc_threshold = 0.0,
+  var_gene_names = "feature_name"
+)
+meta <- list(resources_dir = "src/utils")
+## VIASH END
+
+cat("Starting DESeq2 analysis...\n")
+
+h5mu_to_h5ad <- function(h5mu_path, modality_name) {
+  tmp_path <- tempfile(fileext = ".h5ad")
+  mod_location <- paste("mod", modality_name, sep = "/")
+  h5src <- hdf5r::H5File$new(h5mu_path, "r")
+  h5dest <- hdf5r::H5File$new(tmp_path, "w")
+  children <- hdf5r::list.objects(h5src,
+    path = mod_location,
+    full.names = FALSE, recursive = FALSE
+  )
+  for (child in children) {
+    h5dest$obj_copy_from(
+      h5src, paste(mod_location, child, sep = "/"),
+      paste0("/", child)
+    )
+  }
+  # Also copy the root attributes
+  root_attrs <- hdf5r::h5attr_names(x = h5src)
+  for (attr in root_attrs) {
+    h5a <- h5src$attr_open(attr_name = attr)
+    robj <- h5a$read()
+    h5dest$create_attr_by_name(
+      attr_name = attr,
+      obj_name = ".",
+      robj = robj,
+      space = h5a$get_space(),
+      dtype = h5a$get_type()
+    )
+  }
+  h5src$close()
+  h5dest$close()
+
+  tmp_path
+}
+
+# Check if expression data is normalized (row sums =~ 1)
+is_normalized <- function(layer) {
+  row_sums <- if (is(layer, "sparseMatrix") || is(layer, "dgCMatrix")) {
+    Matrix::rowSums(layer)
+  } else {
+    rowSums(layer)
+  }
+
+  all(abs(row_sums - 1) < 1e-6, na.rm = TRUE)
+}
+
+# Extract design factors from formula
+parse_design_formula <- function(design_formula) {
+  if (!grepl("^~\\s*\\w+(\\s*\\+\\s*\\w+)*$", design_formula)) {
+    stop(
+      sprintf(
+        paste(
+          "Invalid design formula: '%s'.",
+          "Formula must start with '~' and contain factors separated by '+'.",
+          "Example: '~ disease + treatment'",
+          sep = "\n"
+        ),
+        design_formula
+      )
+    )
+  }
+  design_factors <- all.vars(as.formula(design_formula))
+  cat("Design formula:", design_formula, "\n")
+  cat("Extracted factors:", paste(design_factors, collapse = ", "), "\n")
+  design_factors
+}
+
+# Validate and prepare contrast specifications
+prepare_contrast_matrix <- function(
+    design_factors, contrast_column, metadata) {
+  # Validate required columns exist
+  required_columns <- unique(c(design_factors, contrast_column))
+  missing_columns <- setdiff(required_columns, colnames(metadata))
+
+  if (length(missing_columns) > 0) {
+    stop(sprintf(
+      paste(
+        "Missing required columns in metadata: %s\n",
+        "Available metadata columns: %s"
+      ),
+      paste(missing_columns, collapse = ", "),
+      paste(colnames(metadata), collapse = ", ")
+    ))
+  }
+
+  # Check contrast values exist
+  contrast_values <- par$contrast_values
+  available_values <- unique(metadata[[contrast_column]])
+  missing_values <- setdiff(contrast_values, available_values)
+
+  if (length(missing_values) > 0) {
+    stop(sprintf(
+      paste("Contrast values %s not found in %s.",
+        "Available values: %s",
+        sep = "\n"
+      ),
+      paste(missing_values, collapse = ", "),
+      contrast_column,
+      paste(available_values, collapse = ", ")
+    ))
+  }
+
+  # Handle different contrast scenarios
+  if (length(contrast_values) == 2) {
+    # Pairwise comparison
+    comparison_group <- contrast_values[1]
+    control_group <- contrast_values[2]
+    contrast_spec <- c(contrast_column, comparison_group, control_group)
+    cat(
+      "Performing pairwise contrast:", contrast_column,
+      comparison_group, "vs", control_group, "\n"
+    )
+    contrast_spec
+  } else if (length(contrast_values) > 2) {
+    # Multiple comparisons against first value (control_group)
+    control_group <- contrast_values[1]
+    contrast_specs <- list()
+    for (i in 2:length(contrast_values)) {
+      comparison_group <- contrast_values[i]
+      contrast_specs[[length(contrast_specs) + 1]] <-
+        c(contrast_column, comparison_group, control_group)
+    }
+    cat(
+      "Performing multiple contrasts against control_group '",
+      control_group, "':",
+      paste(sapply(contrast_specs, function(x) x[2]), collapse = ", "), "\n"
+    )
+    contrast_specs
+  } else {
+    stop(sprintf(
+      "Need at least 2 values for contrast, got: %s",
+      paste(contrast_values, collapse = ", ")
+    ))
+  }
+}
+
+# Convert expression matrix to counts data frame
+prepare_counts_matrix <- function(layer, var_names, obs_names) {
+  counts <- if (is(layer, "sparseMatrix") || is(layer, "dgCMatrix")) {
+    as.matrix(layer)
+  } else {
+    layer
+  }
+
+  # Create properly named data frame (transpose for DESeq2 format)
+  counts_df <- data.frame(counts)
+  colnames(counts_df) <- var_names
+  rownames(counts_df) <- obs_names
+
+  # Ensure integer counts (required for DESeq2)
+  counts_df[] <- lapply(counts_df, function(x) as.integer(round(x)))
+  counts_df
+}
+
+# Create and configure DESeq2 dataset
+create_deseq2_dataset <- function(counts, metadata, design_formula) {
+  cat("Creating DESeq2 dataset\n")
+
+  # Ensure matching samples between counts and metadata
+  common_samples <- intersect(rownames(counts), rownames(metadata))
+  if (length(common_samples) == 0) {
+    stop("No common samples found between counts and metadata")
+  }
+
+  counts <- counts[common_samples, , drop = FALSE]
+  metadata <- metadata[common_samples, , drop = FALSE]
+
+  # Create DESeqDataSet (transpose for gene x sample format)
+  dds <- DESeq2::DESeqDataSetFromMatrix(
+    countData = t(counts),
+    colData = metadata,
+    design = as.formula(design_formula)
+  )
+  dds
+}
+
+# Perform DESeq2 differential expression analysis
+deseq2_analysis <- function(dds, contrast_specs) {
+  cat("Running DESeq2 analysis\n")
+
+  dds <- DESeq2::DESeq(dds)
+
+  # Ensure contrast_specs is a list
+  if (!is.list(contrast_specs)) {
+    contrast_specs <- list(contrast_specs)
+  }
+
+  all_results <- lapply(seq_along(contrast_specs), function(i) {
+    contrast_spec <- contrast_specs[[i]]
+    cat(
+      "Performing statistical test for contrast:",
+      paste(contrast_spec, collapse = " "), "\n"
+    )
+
+    # Get DESeq2 results for this contrast
+    res <- DESeq2::results(
+      dds,
+      contrast = contrast_spec, alpha = par$p_adj_threshold
+    )
+
+    # Convert to data frame and add metadata
+    results_df <- as.data.frame(res)
+    results_df$gene_id <- rownames(results_df)
+    results_df$contrast <- paste0(contrast_spec[2], "_vs_", contrast_spec[3])
+    results_df$comparison_group <- contrast_spec[2]
+    results_df$control_group <- contrast_spec[3]
+    results_df$abs_log2FoldChange <- abs(results_df$log2FoldChange)
+    results_df$significant <- (
+      results_df$padj < par$p_adj_threshold &
+        !is.na(results_df$padj) &
+        abs(results_df$log2FoldChange) > par$log2fc_threshold
+    )
+
+    # Sort by effect size
+    results_df[order(results_df$log2FoldChange, decreasing = TRUE), ]
+  })
+
+  # Combine all results
+  combined_results <- do.call(rbind, all_results)
+
+  # Log summary statistics
+  for (i in seq_along(contrast_specs)) {
+    contrast_spec <- contrast_specs[[i]]
+    contrast_name <- paste0(contrast_spec[2], "_vs_", contrast_spec[3])
+    contrast_subset <- combined_results[
+      combined_results$contrast == contrast_name,
+    ]
+    n_significant <- sum(contrast_subset$significant, na.rm = TRUE)
+    cat("Contrast", contrast_name, ":", n_significant, "significant genes\n")
+  }
+
+  combined_results
+}
+
+# Save results and print summary statistics
+save_results_and_log_summary <- function(
+    results, output_file, cell_group = NULL) {
+  group_text <- if (!is.null(cell_group)) paste(" for", cell_group) else ""
+  cat("Saving results", group_text, "to", output_file, "\n")
+
+  write.csv(results, output_file, row.names = FALSE)
+
+  # Calculate summary statistics
+  sig_results <- results[results$significant & !is.na(results$significant), ]
+  upregulated <- sig_results[sig_results$log2FoldChange > 0, ]
+  downregulated <- sig_results[sig_results$log2FoldChange < 0, ]
+
+  cat("Summary", group_text, ":\n")
+  cat("  Total genes analyzed:", nrow(results), "\n")
+  cat("  Significant upregulated:", nrow(upregulated), "\n")
+  cat("  Significant downregulated:", nrow(downregulated), "\n")
+}
+
+# Main analysis workflow
+main <- function() {
+  cat("Loading pseudobulk data from", par$input, "\n")
+
+  # Load and prepare data
+  h5ad_path <- h5mu_to_h5ad(par$input, par$modality)
+  mod <- anndataR::read_h5ad(h5ad_path, as = "InMemoryAnnData")
+  metadata <- as.data.frame(mod$obs)
+
+  # Get expression matrix
+  layer <- if (!is.null(par$input_layer)) {
+    mod$layers[[par$input_layer]]
+  } else {
+    mod$X
+  }
+
+  if (is_normalized(layer)) {
+    stop("Input layer must contain raw counts.")
+  }
+
+  # Prepare analysis components
+  cat("Preparing design formula\n")
+  design_factors <- parse_design_formula(par$design_formula)
+
+  cat("Preparing contrast matrix\n")
+  contrast_specs <- prepare_contrast_matrix(
+    design_factors, par$contrast_column, metadata
+  )
+
+  cat("Preparing counts matrix for DESeq2\n")
+  var_names <- if (!is.null(par$var_gene_names)) {
+    if (par$var_gene_names %in% colnames(mod$var)) {
+      mod$var[[par$var_gene_names]]
+    } else {
+      stop(
+        sprintf(
+          "var_gene_names '%s' not found in mod$var columns: %s",
+          par$var_gene_names,
+          paste(colnames(mod$var), collapse = ", ")
+        )
+      )
+    }
+  } else {
+    mod$var_names
+  }
+  obs_names <- mod$obs_names
+  counts <- prepare_counts_matrix(layer, var_names, obs_names)
+
+  # Ensure output directory exists
+  if (!dir.exists(par$output_dir)) {
+    dir.create(par$output_dir, recursive = TRUE)
+  }
+
+  # Run analysis (per cell group or overall)
+  tryCatch(
+    {
+      if (
+        !is.null(par$obs_cell_group) &&
+          par$obs_cell_group %in% colnames(metadata)
+      ) {
+        run_per_cell_group_analysis(counts, metadata, contrast_specs)
+      } else {
+        run_overall_analysis(counts, metadata, contrast_specs)
+      }
+      cat("DESeq2 analysis completed successfully\n")
+    },
+    error = function(e) {
+      cat("Error in analysis. Check input data and parameters:\n")
+      cat("Contrast column:", par$contrast_column, "\n")
+      cat("Contrast values:", paste(par$contrast_values, collapse = ", "), "\n")
+      cat("Number of samples:", nrow(metadata), "\n")
+      cat("Number of genes:", ncol(counts), "\n")
+      stop(e)
+    }
+  )
+}
+
+# Run analysis per cell group
+run_per_cell_group_analysis <- function(counts, metadata, contrast_specs) {
+  cat("Running DESeq2 analysis per cell group\n")
+
+  # Remove cell group from design formula
+  design_no_celltype <- gsub(
+    paste0("\\+\\s*", par$obs_cell_group), "", par$design_formula
+  )
+  design_no_celltype <- gsub(
+    paste0(par$obs_cell_group, "\\s*\\+"), "", design_no_celltype
+  )
+  design_no_celltype <- gsub("\\s+", " ", design_no_celltype)
+
+  cell_groups <- unique(metadata[[par$obs_cell_group]])
+
+  for (cell_group in cell_groups) {
+    cat("Processing cell group:", cell_group, "\n")
+
+    # Subset data
+    cell_mask <- metadata[[par$obs_cell_group]] == cell_group
+    counts_subset <- counts[cell_mask, , drop = FALSE]
+    metadata_subset <- metadata[cell_mask, , drop = FALSE]
+
+    # Skip if insufficient samples
+    if (nrow(counts_subset) < 2) {
+      cat("Skipping cell group", cell_group, "- too few samples\n")
+      next
+    }
+
+    # Run analysis
+    dds <- create_deseq2_dataset(
+      counts_subset, metadata_subset, design_no_celltype
+    )
+    results <- deseq2_analysis(dds, contrast_specs)
+    results[[par$obs_cell_group]] <- cell_group
+
+    # Save results
+    safe_name <- gsub("[/ \\(\\)]", "_", as.character(cell_group))
+    safe_name <- gsub("_+", "_", safe_name)
+    output_file <- file.path(
+      par$output_dir,
+      paste0(par$output_prefix, "_", safe_name, ".csv")
+    )
+    save_results_and_log_summary(results, output_file, cell_group)
+  }
+}
+
+# Run overall analysis (all samples together)
+run_overall_analysis <- function(counts, metadata, contrast_specs) {
+  dds <- create_deseq2_dataset(counts, metadata, par$design_formula)
+  results <- deseq2_analysis(dds, contrast_specs)
+
+  output_file <- file.path(
+    par$output_dir,
+    paste0(par$output_prefix, ".csv")
+  )
+  save_results_and_log_summary(results, output_file)
+}
+
+# Run main function if script is executed directly
+if (!interactive()) {
+  main()
+}
diff --git a/src/differential_expression/deseq2/test.py b/src/differential_expression/deseq2/test.py
new file mode 100644
index 00000000000..63a05f0f4e6
--- /dev/null
+++ b/src/differential_expression/deseq2/test.py
@@ -0,0 +1,405 @@
+import subprocess
+import sys
+import pytest
+import pandas as pd
+import re
+
+
+## VIASH START
+meta = {"resources_dir": "resources_test/"}
+## VIASH END
+
+sys.path.append(meta["resources_dir"])
+
+
+@pytest.fixture
+def pseudobulk_test_data_path():
+    """Path to the pseudobulk test data"""
+    return f"{meta['resources_dir']}/TS_Blood_filtered_pseudobulk.h5mu"
+
+
+def test_simple_deseq2_execution(run_component, tmp_path, pseudobulk_test_data_path):
+    """Test basic DESeq2 execution with minimal parameters"""
+    output_dir = tmp_path / "deseq2_output"
+
+    run_component(
+        [
+            "--input",
+            pseudobulk_test_data_path,
+            "--output_dir",
+            str(output_dir),
+            "--design_formula",
+            "~ treatment",
+            "--contrast_column",
+            "treatment",
+            "--contrast_values",
+            "stim",
+            "--contrast_values",
+            "ctrl",
+        ]
+    )
+
+    assert output_dir.exists(), "Output directory does not exist"
+
+    # Assert one CSV file was found
+    csv_files = list(output_dir.glob("deseq2_analysis*.csv"))  # Default prefix
+    assert len(csv_files) == 1, "Should find exactly one CSV file in output folder"
+    output_file = output_dir / "deseq2_analysis.csv"  # Default prefix
+    assert output_file.exists(), "Output CSV file does not exist"
+
+    # Check the output file
+    results = pd.read_csv(output_file)
+    expected_columns = [
+        "baseMean",
+        "log2FoldChange",
+        "lfcSE",
+        "stat",
+        "pvalue",
+        "padj",
+        "significant",
+        "gene_id",
+        "contrast",
+        "comparison_group",
+        "control_group",
+        "abs_log2FoldChange",
+    ]
+    assert all(col in results.columns for col in expected_columns), (
+        f"Expected columns {expected_columns} not found"
+    )
+
+    expected_float_cols = [
+        "baseMean",
+        "log2FoldChange",
+        "lfcSE",
+        "stat",
+        "pvalue",
+        "padj",
+        "abs_log2FoldChange",
+    ]
+    float_cols = results.select_dtypes(include=["float"]).columns.tolist()
+    assert all(col in float_cols for col in expected_float_cols), (
+        f"Expected float columns {expected_float_cols} not found"
+    )
+
+    expected_obj_cols = ["gene_id", "contrast", "comparison_group", "control_group"]
+    obj_cols = results.select_dtypes(include=["object"]).columns.tolist()
+    assert all(col in obj_cols for col in expected_obj_cols), (
+        f"Expected object columns {expected_obj_cols} not found"
+    )
+
+
+def test_simple_deseq2_with_cell_group(
+    run_component, tmp_path, pseudobulk_test_data_path
+):
+    """Test DESeq2 execution with cell groups - should create separate files"""
+    output_dir = tmp_path / "deseq2_output"
+
+    run_component(
+        [
+            "--input",
+            pseudobulk_test_data_path,
+            "--output_dir",
+            str(output_dir),
+            "--obs_cell_group",
+            "cell_type",
+            "--design_formula",
+            "~ treatment",
+            "--contrast_column",
+            "treatment",
+            "--contrast_values",
+            "stim",
+            "--contrast_values",
+            "ctrl",
+        ]
+    )
+
+    assert output_dir.exists(), "Output directory does not exist"
+
+    # Check that multiple CSV files exist (one per cell group)
+    csv_files = list(output_dir.glob("deseq2_analysis_*.csv"))  # Default prefix
+    assert len(csv_files) > 0, "No cell group-specific CSV files found"
+
+    # Check each file has expected structure
+    for csv_file in csv_files:
+        results = pd.read_csv(csv_file)
+        expected_columns = [
+            "baseMean",
+            "log2FoldChange",
+            "lfcSE",
+            "stat",
+            "pvalue",
+            "padj",
+            "significant",
+            "gene_id",
+            "contrast",
+            "comparison_group",
+            "control_group",
+            "abs_log2FoldChange",
+        ]
+        assert all(col in results.columns for col in expected_columns), (
+            f"Expected columns {expected_columns} not found in {csv_file}"
+        )
+        assert len(results) > 0, f"No results found in {csv_file}"
+
+        # Check that all rows have the same cell_type value
+        assert results["cell_type"].nunique() == 1, (
+            f"Multiple cell types found in {csv_file}"
+        )
+
+        expected_float_cols = [
+            "baseMean",
+            "log2FoldChange",
+            "lfcSE",
+            "stat",
+            "pvalue",
+            "padj",
+            "abs_log2FoldChange",
+        ]
+        float_cols = results.select_dtypes(include=["float"]).columns.tolist()
+        assert all(col in float_cols for col in expected_float_cols), (
+            f"Expected float columns {expected_float_cols} not found"
+        )
+
+        expected_obj_cols = ["gene_id", "contrast", "comparison_group", "control_group"]
+        obj_cols = results.select_dtypes(include=["object"]).columns.tolist()
+        assert all(col in obj_cols for col in expected_obj_cols), (
+            f"Expected object columns {expected_obj_cols} not found"
+        )
+
+
+def test_complex_design_formula(run_component, tmp_path, pseudobulk_test_data_path):
+    """Test DESeq2 with complex design formula accounting for multiple factors"""
+    output_dir = tmp_path / "deseq2_output"
+
+    run_component(
+        [
+            "--input",
+            pseudobulk_test_data_path,
+            "--output_dir",
+            str(output_dir),
+            "--design_formula",
+            "~ disease + treatment",
+            "--contrast_column",
+            "treatment",
+            "--contrast_values",
+            "stim",
+            "--contrast_values",
+            "ctrl",
+            "--padj_threshold",
+            "0.1",
+            "--log2fc_threshold",
+            "0.5",
+        ]
+    )
+
+    assert output_dir.exists(), "Output directory does not exist"
+    output_file = output_dir / "deseq2_analysis.csv"  # Default prefix
+    assert output_file.exists(), "Output CSV file does not exist"
+
+    results = pd.read_csv(output_file)
+    assert "significant" in results.columns, "Significance column not found"
+    assert len(results) > 0, "No results found for complex design"
+
+
+def test_complex_design_formula_with_cell_groups(
+    run_component, tmp_path, pseudobulk_test_data_path
+):
+    """Test that without cell group specified, a single CSV is created in output directory"""
+    output_dir = tmp_path / "deseq2_output"
+
+    run_component(
+        [
+            "--input",
+            pseudobulk_test_data_path,
+            "--output_dir",
+            str(output_dir),
+            "--design_formula",
+            "~ treatment + disease",
+            "--obs_cell_group",
+            "cell_type",
+            "--contrast_column",
+            "treatment",
+            "--contrast_values",
+            "stim",
+            "--contrast_values",
+            "ctrl",
+        ]
+    )
+
+    assert output_dir.exists(), "Output directory does not exist"
+
+    # Check that cell group specific files exist
+    csv_files = list(output_dir.glob("deseq2_analysis_*.csv"))  # Default prefix
+    assert len(csv_files) >= 1, "Could not find cell group-specific files"
+
+    # Check the main file structure
+    for csv_file in csv_files:
+        results = pd.read_csv(csv_file)
+        expected_columns = ["log2FoldChange", "pvalue", "padj", "significant"]
+        assert all(col in results.columns for col in expected_columns), (
+            f"Expected columns {expected_columns} not found in {csv_file}"
+        )
+        assert len(results) > 0, f"No results found in {csv_file}"
+        assert results["cell_type"].nunique() == 1, (
+            f"Multiple cell types found in {csv_file} - should be one per file"
+        )
+
+
+def test_invalid_contrast_column(run_component, tmp_path, pseudobulk_test_data_path):
+    """Test that invalid contrast column raises appropriate error"""
+    output_dir = tmp_path / "deseq2_output"
+
+    with pytest.raises(subprocess.CalledProcessError) as err:
+        run_component(
+            [
+                "--input",
+                pseudobulk_test_data_path,
+                "--output_dir",
+                str(output_dir),
+                "--design_formula",
+                "~ treatment",
+                "--contrast_column",
+                "nonexistent_column",
+                "--contrast_values",
+                "group1",
+                "--contrast_values",
+                "group2",
+            ]
+        )
+
+    # Updated regex to match actual R error format (no square brackets)
+    assert re.search(
+        r"Missing required columns in metadata: nonexistent_column",
+        err.value.stdout.decode("utf-8"),
+    ), f"Expected error message not found: {err.value.stdout.decode('utf-8')}"
+
+
+def test_invalid_design_column(run_component, tmp_path, pseudobulk_test_data_path):
+    """Test that invalid contrast column raises appropriate error"""
+    output_dir = tmp_path / "deseq2_output"
+
+    with pytest.raises(subprocess.CalledProcessError) as err:
+        run_component(
+            [
+                "--input",
+                pseudobulk_test_data_path,
+                "--output_dir",
+                str(output_dir),
+                "--design_formula",
+                "malformed formula",
+                "--contrast_column",
+                "treatment",
+                "--contrast_values",
+                "ctrl",
+                "--contrast_values",
+                "stim",
+            ]
+        )
+
+    # Updated regex to match actual R error format (no square brackets)
+    assert re.search(
+        r"Invalid design formula: 'malformed formula'",
+        err.value.stdout.decode("utf-8"),
+    ), f"Expected error message not found: {err.value.stdout.decode('utf-8')}"
+
+
+def test_custom_output_prefix(run_component, tmp_path, pseudobulk_test_data_path):
+    """Test custom output prefix functionality"""
+    output_dir = tmp_path / "deseq2_output"
+    custom_prefix = "my_custom_analysis"
+
+    run_component(
+        [
+            "--input",
+            pseudobulk_test_data_path,
+            "--output_dir",
+            str(output_dir),
+            "--output_prefix",
+            custom_prefix,
+            "--design_formula",
+            "~ treatment",
+            "--contrast_column",
+            "treatment",
+            "--contrast_values",
+            "stim",
+            "--contrast_values",
+            "ctrl",
+        ]
+    )
+
+    assert output_dir.exists(), "Output directory does not exist"
+
+    # Should have exactly one CSV file with custom prefix
+    output_file = output_dir / f"{custom_prefix}.csv"
+    assert output_file.exists(), (
+        f"Custom prefix output CSV file {output_file} does not exist"
+    )
+
+    # Check the main file structure
+    results = pd.read_csv(output_file)
+    expected_columns = ["log2FoldChange", "pvalue", "padj", "significant"]
+    assert all(col in results.columns for col in expected_columns), (
+        f"Expected columns {expected_columns} not found"
+    )
+    assert len(results) > 0, "No results found in output"
+
+
+def test_custom_output_prefix_with_cell_groups(
+    run_component, tmp_path, pseudobulk_test_data_path
+):
+    """Test custom output prefix with cell groups"""
+    output_dir = tmp_path / "deseq2_output"
+    custom_prefix = "celltype_analysis"
+
+    run_component(
+        [
+            "--input",
+            pseudobulk_test_data_path,
+            "--output_dir",
+            str(output_dir),
+            "--output_prefix",
+            custom_prefix,
+            "--obs_cell_group",
+            "cell_type",
+            "--design_formula",
+            "~ treatment",
+            "--contrast_column",
+            "treatment",
+            "--contrast_values",
+            "stim",
+            "--contrast_values",
+            "ctrl",
+        ]
+    )
+
+    assert output_dir.exists(), "Output directory does not exist"
+
+    # Check that multiple CSV files exist with custom prefix
+    csv_files = list(output_dir.glob(f"{custom_prefix}_*.csv"))
+    assert len(csv_files) > 0, (
+        f"No cell group-specific CSV files found with prefix {custom_prefix}"
+    )
+
+    # Check each file has expected structure
+    for csv_file in csv_files:
+        results = pd.read_csv(csv_file)
+        expected_columns = [
+            "log2FoldChange",
+            "pvalue",
+            "padj",
+            "significant",
+            "cell_type",
+        ]
+        assert all(col in results.columns for col in expected_columns), (
+            f"Expected columns {expected_columns} not found in {csv_file}"
+        )
+        assert len(results) > 0, f"No results found in {csv_file}"
+
+        # Check that all rows have the same cell_type value
+        assert results["cell_type"].nunique() == 1, (
+            f"Multiple cell types found in {csv_file}"
+        )
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__]))
diff --git a/src/workflows/differential_expression/pseudobulk_deseq2/config.vsh.yaml b/src/workflows/differential_expression/pseudobulk_deseq2/config.vsh.yaml
new file mode 100644
index 00000000000..256824bdadd
--- /dev/null
+++ b/src/workflows/differential_expression/pseudobulk_deseq2/config.vsh.yaml
@@ -0,0 +1,157 @@
+name: pseudobulk_deseq2
+namespace: workflows/differential_expression
+scope: public
+description: |
+  Performs pseudobulk generation and subsequent differential expression analysis using DESeq2.
+authors:
+  - __merge__: /src/authors/jakub_majercik.yaml
+    roles: [ author ]
+  - __merge__: /src/authors/weiwei_schultz.yaml
+    roles: [ contributor ]
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        type: file
+        required: true
+        description: Input h5mu file.
+        example: /path/to/input_file.h5mu
+      - name: --modality
+        type: string
+        default: rna
+        description: Which modality from the input MuData file to process.
+      - name: --input_layer
+        type: string
+        required: false
+        description: Input layer to use. If None, X is used. This layer must contain raw counts.
+      - name: --obs_cell_group
+        type: string
+        required: true
+        description: |
+          .obs field containing the variable to group on. Typically this field contains cell groups such as annotated cell types or clusters.
+          If provided, will generate DESeq2 analysis per cell group.
+        example: cell_type
+      - name: --obs_groups
+        type: string
+        multiple: true
+        description: .obs fields containing the experimental condition(s) to create pseudobulk samples for.
+        example: ["treatment", "disease"]
+      - name: "--var_gene_names"
+        type: string
+        description: |
+          Name of the .var field that contains gene symbols. If not provided, .var.index will be used.
+        required: false
+        example: "gene_symbol"
+  
+  - name: Pseudobulk Options
+    arguments:
+      - name: --aggregation_method
+        type: string
+        choices: ["sum", "mean"]
+        default: sum
+        description: Method to aggregate the raw counts for pseudoreplicates. Either sum or mean.
+      - name: --random_state
+        type: integer
+        min: 0
+        default: 0
+        description: The random seed for sampling.
+
+  - name: Filtering options
+    arguments:
+      - name: --min_obs_per_sample
+        type: integer
+        min: 1
+        default: 30
+        description: Minimum number of observations per pseudobulk sample.
+      - name: --filter_genes_min_samples
+        type: integer
+        min: 1
+        description: |
+          Minimum number of samples a gene must be expressed in to be included in the analysis.
+          If None, no filtering is applied.
+      - name: --filter_gene_patterns
+        type: string
+        multiple: true
+        example: ["MIR\\d+", "AL\\d+", "LINC\\d+", "AC\\d+", "AP\\d+"]
+        description: |
+          List of regex patterns to filter out genes.
+          Genes matching any of these patterns will be excluded from the analysis.
+        required: false
+
+  - name: DGEA options
+    arguments:
+      - name: --design_formula
+        type: string
+        required: true
+        description: |
+          Design formula for DESeq2 analysis in R-style format.
+          Specifies the statistical model to account for various factors.
+        example: "~ cell_type + disease + treatment"
+      - name: --contrast_column
+        type: string
+        required: true
+        description: |
+          Column in the metadata to use for the contrast.
+          This column should contain the conditions to compare.
+        example: "treatment"
+      - name: --contrast_values
+        type: string
+        multiple: true
+        required: true
+        description: |
+          Values to compare in the contrast column.
+          First value is baseline, following values are different treatments.
+        example: ["ctrl", "stim"]
+      - name: --p_adj_threshold
+        type: double
+        default: 0.05
+        description: |
+          Adjusted p-value threshold for significance.
+          Genes with adjusted p-values below this threshold will be considered significant.
+        required: false
+      - name: --log2fc_threshold
+        type: double
+        default: 0.0
+        description: |
+          Log2 fold change threshold for significance.
+          Genes with absolute log2 fold change above this threshold will be considered significant.
+        required: false
+
+  - name: Outputs
+    arguments:
+      - name: "--output"
+        alternatives: ["-o"]
+        type: file
+        description: |
+          Output directory for DESeq2 results, containing one CSV file per cell group (as defined by `--obs_cell_group`) 
+        direction: output
+        required: true
+      - name: "--output_prefix"
+        type: string
+        description: |
+          Prefix for output CSV files: files will be named "{prefix}_{cell_group}.csv"
+        default: "deseq2_analysis"
+        required: false
+
+dependencies:
+  - name: differential_expression/create_pseudobulk
+  - name: differential_expression/deseq2
+  - name: filter/filter_with_counts
+  - name: filter/filter_with_pattern
+  - name: filter/delimit_counts
+  - name: filter/do_filter
+
+resources:
+  - type: nextflow_script
+    path: main.nf
+    entrypoint: run_wf
+
+test_resources:
+  - type: nextflow_script
+    path: test.nf
+    entrypoint: test_wf
+  - path: /resources_test/annotation_test_data/TS_Blood_filtered.h5mu
+
+runners:
+  - type: nextflow
\ No newline at end of file
diff --git a/src/workflows/differential_expression/pseudobulk_deseq2/integration_test.sh b/src/workflows/differential_expression/pseudobulk_deseq2/integration_test.sh
new file mode 100755
index 00000000000..c712b294512
--- /dev/null
+++ b/src/workflows/differential_expression/pseudobulk_deseq2/integration_test.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+nextflow \
+  run . \
+  -main-script src/workflows/differential_expression/pseudobulk_deseq2/test.nf \
+  -entry test_wf \
+  -profile docker,no_publish \
+  -c src/workflows/utils/labels_ci.config \
+  -c src/workflows/utils/integration_tests.config \
diff --git a/src/workflows/differential_expression/pseudobulk_deseq2/main.nf b/src/workflows/differential_expression/pseudobulk_deseq2/main.nf
new file mode 100644
index 00000000000..a9fec8dc6d9
--- /dev/null
+++ b/src/workflows/differential_expression/pseudobulk_deseq2/main.nf
@@ -0,0 +1,94 @@
+workflow run_wf {
+  take:
+    input_ch
+
+  main:
+
+    output_ch = input_ch
+      | create_pseudobulk.run(
+        fromState: [
+          input: "input",
+          modality: "modality",
+          input_layer: "input_layer",
+          obs_label: "obs_cell_group",
+          obs_groups: "obs_groups",
+          aggregation_method: "aggregation_method",
+          random_state: "random_state"
+        ],
+        args:[
+          obs_cell_count: "n_cells"
+        ],
+        toState: [ "input": "output" ]
+      )
+
+      | delimit_counts.run(
+          fromState: [
+            input: "input",
+            modality: "modality",
+            min_counts: "min_obs_per_sample",
+
+          ],
+          args: [
+            do_subset: "True",
+            obs_count_column: "n_cells",
+            obs_name_filter: "delimit_samples_per_pseudobulk",
+          ],
+          toState: [ "input": "output" ]
+      )
+
+      | filter_with_pattern.run(
+          runIf: { id, state ->
+            state.filter_gene_patterns
+          },
+          fromState: [
+            input: "input",
+            modality: "modality",
+            var_gene_names: "var_gene_names",
+            pattern: "filter_gene_patterns",
+            min_counts: "min_obs_per_sample",
+
+          ],
+          args: [
+            do_subset: "True",
+            var_name_filter: "filter_with_gene_pattern"
+          ],
+          toState: [ "input": "output" ]
+      )
+
+      | filter_with_counts.run(
+          fromState: [
+            input: "input",
+            modality: "modality",
+            layer: "input_layer",
+            min_cells_per_gene: "filter_genes_min_samples",
+          ],
+          args: [
+            do_subset: "True",
+            var_name_filter: "filter_with_counts"
+          ],
+          toState: [ "input": "output" ]
+      )
+
+      | deseq2.run(
+        fromState: [
+          input: "input",
+          modality: "modality",
+          input_layer: "input_layer",
+          var_gene_names: "var_gene_names",
+          obs_cell_group: "obs_cell_group",
+          design_formula: "design_formula",
+          contrast_column: "contrast_column",
+          contrast_values: "contrast_values",
+          p_adj_threshold: "p_adj_threshold",
+          log2fc_threshold: "log2fc_threshold",
+          output_dir: "output",
+          output_prefix: "output_prefix"
+        ],
+        toState: [ "output": "output_dir" ]
+      )
+
+      | setState([ "output" ])
+
+  emit:
+    output_ch
+}
\ No newline at end of file
diff --git a/src/workflows/differential_expression/pseudobulk_deseq2/nextflow.config b/src/workflows/differential_expression/pseudobulk_deseq2/nextflow.config
new file mode 100644
index 00000000000..059100c489c
--- /dev/null
+++ b/src/workflows/differential_expression/pseudobulk_deseq2/nextflow.config
@@ -0,0 +1,10 @@
+manifest {
+  nextflowVersion = '!>=20.12.1-edge'
+}
+
+params {
+  rootDir = java.nio.file.Paths.get("$projectDir/../../../../").toAbsolutePath().normalize().toString()
+}
+
+// include common settings
+includeConfig("${params.rootDir}/src/workflows/utils/labels.config")
diff --git a/src/workflows/differential_expression/pseudobulk_deseq2/test.nf b/src/workflows/differential_expression/pseudobulk_deseq2/test.nf
new file mode 100644
index 00000000000..565682f58fd
--- /dev/null
+++ b/src/workflows/differential_expression/pseudobulk_deseq2/test.nf
@@ -0,0 +1,86 @@
+nextflow.enable.dsl=2
+
+include { pseudobulk_deseq2 } from params.rootDir + "/target/nextflow/workflows/differential_expression/pseudobulk_deseq2/main.nf"
+include { pseudobulk_deseq2_test } from params.rootDir + "/target/_test/nextflow/test_workflows/differential_expression/pseudobulk_deseq2_test/main.nf"
+
+params.resources_test = params.rootDir + "/resources_test"
+
+workflow test_wf {
+  // allow changing the resources_test dir
+  resources_test = file(params.resources_test)
+
+  output_ch = Channel.fromList(
+    [
+      [
+        id: "simple_execution_test",
+        input: resources_test.resolve("annotation_test_data/TS_Blood_filtered.h5mu"),
+        obs_cell_group: "cell_type",
+        obs_groups: ["treatment", "disease"],
+        min_obs_per_sample: 5,
+        design_formula: "~ treatment",
+        contrast_column: "treatment",
+        contrast_values: ["ctrl", "stim"],
+        output: "simple_execution_test_output"
+      ]
+    ])
+    | map{ state -> [state.id, state] }
+    | pseudobulk_deseq2
+
+    | view { output ->
+      assert output.size() == 2 : "Outputs should contain two elements; [id, state]"
+
+      // check id
+      def id = output[0]
+      assert id.endsWith("_test") : "Output ID should be same as input ID"
+
+      // check output
+      def state = output[1]
+      assert state instanceof Map : "State should be a map. Found: ${state}"
+      assert state.containsKey("output") : "Output should contain key 'output'. found: ${state}"
+      assert state.output.isDirectory() : "'output' should be a directory."
+      def deseqFiles = state.output.listFiles().findAll { file ->
+        file.name.matches(/deseq2_analysis_.*\.csv/)
+      }
+      assert deseqFiles.size() == 4, "Output directory should contain exactly 4 deseq2_analysis_*.csv files. Found: ${deseqFiles.size()} files: ${deseqFiles.collect { it.name }}"
+    
+    "Output: $output"
+    }
+
+    | flatMap { output->
+        def id = output[0]
+        def state = output[1]
+        
+        // Find all CSV files and create separate entries
+        def deseqFiles = state.output.listFiles().findAll { file ->
+          file.name.matches(/deseq2_analysis_.*\.csv/)
+        }
+        deseqFiles.collect { csvFile ->
+          def cellType = csvFile.name.replaceAll(/deseq2_analysis_(.*)\.csv/, '$1')
+          def newId = "${id}_${cellType}"
+          [newId, ["input": csvFile]]
+        }
+    }
+
+    | view { output -> "After flatMap: $output" }
+    | pseudobulk_deseq2_test.run(
+        fromState: [
+          "input": "input"
+        ]
+    )
+
+  output_ch
+    | toSortedList({a, b -> a[0] <=> b[0]})
+    | map { output_list ->
+      assert output_list.size() == 4 : "output channel should contain 4 events"
+
+      def sortedIds = output_list.collect{it[0]}.sort()
+      def expectedIds = [
+        "simple_execution_test_classical_monocyte",
+        "simple_execution_test_erythrocyte",
+        "simple_execution_test_neutrophil",
+        "simple_execution_test_plasma_cell"
+      ]
+
+      assert sortedIds == expectedIds : "IDs don't match. Expected: ${expectedIds}, found: ${sortedIds}"
+    }
+}
\ No newline at end of file
diff --git a/src/workflows/test_workflows/differential_expression/pseudobulk_deseq2/config.vsh.yaml b/src/workflows/test_workflows/differential_expression/pseudobulk_deseq2/config.vsh.yaml
new file mode 100644
index 00000000000..55be62a1379
--- /dev/null
+++ b/src/workflows/test_workflows/differential_expression/pseudobulk_deseq2/config.vsh.yaml
@@ -0,0 +1,25 @@
+name: "pseudobulk_deseq2_test"
+namespace: "test_workflows/differential_expression"
+scope: "test"
+description: "This component tests the output of the pseudobulk_deseq2 differential expression analysis workflow."
+authors:
+  - __merge__: /src/authors/dorien_roosen.yaml
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: "--input"
+        type: file
+        required: true
+        description: Path to h5mu output.
+        example: foo.final.h5mu
+
+resources:
+  - type: python_script
+    path: script.py
+engines:
+  - type: docker
+    image: python:3.13-slim
+    __merge__: /src/base/requirements/testworkflows_setup.yaml
+runners:
+  - type: executable
+  - type: nextflow
\ No newline at end of file
diff --git a/src/workflows/test_workflows/differential_expression/pseudobulk_deseq2/script.py b/src/workflows/test_workflows/differential_expression/pseudobulk_deseq2/script.py
new file mode 100644
index 00000000000..297bac204c7
--- /dev/null
+++ b/src/workflows/test_workflows/differential_expression/pseudobulk_deseq2/script.py
@@ -0,0 +1,59 @@
+import pandas as pd
+import sys
+import pytest
+
+## VIASH START
+par = {"input": "test/deseq_analysis_classical_monocyte.csv"}
+## VIASH END
+
+
+def test_run():
+    results = pd.read_csv(par["input"])
+
+    # Make sure expected columns are present
+    expected_columns = [
+        "baseMean",
+        "log2FoldChange",
+        "lfcSE",
+        "stat",
+        "pvalue",
+        "padj",
+        "significant",
+        "gene_id",
+        "contrast",
+        "comparison_group",
+        "control_group",
+        "abs_log2FoldChange",
+    ]
+    assert all(col in results.columns for col in expected_columns), (
+        f"Expected columns {expected_columns} not found"
+    )
+
+    # Make sure columns have expected dtypes
+    assert results["significant"].dtypes == "bool", (
+        "Expected 'significant' column to be of type bool"
+    )
+
+    expected_float_cols = [
+        "baseMean",
+        "log2FoldChange",
+        "lfcSE",
+        "stat",
+        "pvalue",
+        "padj",
+        "abs_log2FoldChange",
+    ]
+    float_cols = results.select_dtypes(include=["float"]).columns.tolist()
+    assert all(col in float_cols for col in expected_float_cols), (
+        f"Expected float columns {expected_float_cols} not found"
+    )
+
+    expected_obj_cols = ["gene_id", "contrast", "comparison_group", "control_group"]
+    obj_cols = results.select_dtypes(include=["object"]).columns.tolist()
+    assert all(col in obj_cols for col in expected_obj_cols), (
+        f"Expected object columns {expected_obj_cols} not found"
+    )
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "--import-mode=importlib"]))