Loading Data in Parallel

In [4]:
import scanpy as sc
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Tuple, List, Dict, Iterator
from collections import defaultdict
from joblib import Parallel, delayed
from tqdm import tqdm
from scipy.sparse import csc_matrix
import time
import gc  # For garbage collection
import os


class OptimizedGeneDataProcessor:
    """Processes gene expression data for multiple target genes efficiently."""

    def __init__(self):
        self.gene_data = {}
        self.sample_names = None
        self.related_genes = {}
        self.expr_matrix = None
        self.gene_names = None
        self.gene_name_to_idx = {}
        self.gene_network = defaultdict(list)
        self.valid_targets = []
        self.start_time = None

    def load_data(self, expression_file: Path, network_file: Path):
        """
        Loads expression and network data once for all target genes.

        Args:
            expression_file: Path to h5ad expression data file
            network_file: Path to network TSV file
        """
        self.start_time = time.time()
        print("Loading expression data...")
        adata = sc.read_h5ad(expression_file)
        self.expr_matrix = (
            csc_matrix(adata.X) if not hasattr(adata.X, "tocsc") else adata.X.tocsc()
        )
        self.gene_names = np.array(adata.var_names.tolist())
        self.sample_names = np.array(adata.obs_names.tolist())
        self.gene_name_to_idx = {gene: idx for idx, gene in enumerate(self.gene_names)}

        print("Loading network data...")
        network_df = pd.read_csv(network_file, sep="\t")
        source_col, target_col = network_df.columns[:2]

        for _, row in network_df.iterrows():
            if (
                row[target_col] in self.gene_name_to_idx
                and row[source_col] in self.gene_name_to_idx
            ):
                self.gene_network[row[target_col]].append(row[source_col])

        self.valid_targets = list(self.gene_network.keys())
        print(f"Found {len(self.valid_targets)} valid target genes in the network.")

    def get_all_target_genes(self) -> List[str]:
        """Returns list of all valid target genes in the network."""
        return self.valid_targets

    def prepare_training_data_for_gene(
        self, target_gene: str
    ) -> Tuple[np.ndarray, np.ndarray, Dict, List[str]]:
        """
        Prepares training data for a specific target gene.

        Args:
            target_gene: Name of target gene

        Returns:
            Tuple of (input matrix, target values, model config, related genes list)
        """
        related_genes = self.gene_network.get(target_gene, [])
        if not related_genes:
            print(f"Warning: No related genes found for {target_gene}")
            return None, None, None, []

        target_idx = self.gene_name_to_idx[target_gene]
        related_indices = [self.gene_name_to_idx[gene] for gene in related_genes]

        try:
            X = self.expr_matrix[:, related_indices].toarray()
            y = self.expr_matrix[:, target_idx].toarray().flatten()

            config = {"width": [X.shape[1], 2, 1], "grid": 5, "k": 4, "seed": 42}

            return X.astype(np.float32), y.astype(np.float32), config, related_genes
        except MemoryError:
            print(
                f"Memory error processing gene {target_gene} with {len(related_genes)} related genes"
            )
            return None, None, None, []

    def prepare_all_training_data(
        self,
    ) -> Iterator[Tuple[str, np.ndarray, np.ndarray, Dict, List[str]]]:
        """
        Generator that yields prepared training data for all target genes.
        """
        for target_gene in tqdm(self.valid_targets, desc="Processing genes"):
            X, y, config, related_genes = self.prepare_training_data_for_gene(
                target_gene
            )
            if X is not None:
                yield target_gene, X, y, config, related_genes

    def process_in_batches(
        self, batch_size: int = 100, output_dir: str = None
    ) -> Dict[str, Dict]:
        """
        Process genes in memory-efficient batches and optionally save to disk.

        Args:
            batch_size: Number of genes to process in each batch
            output_dir: If provided, save data to this directory instead of returning

        Returns:
            Dictionary of processed gene data or info about saved files
        """
        if not self.valid_targets:
            raise ValueError("No data loaded. Call load_data() first.")

        expected_count = len(self.valid_targets)
        processed_count = 0
        error_genes = []
        processed_genes = {}

        # Create output directory if needed
        if output_dir:
            os.makedirs(output_dir, exist_ok=True)
            data_dir = os.path.join(output_dir, "gene_data")
            os.makedirs(data_dir, exist_ok=True)

        self.start_time = time.time()
        print(f"Processing {expected_count} genes in batches of {batch_size}")

        # Process in batches
        for batch_start in range(0, expected_count, batch_size):
            batch_end = min(batch_start + batch_size, expected_count)
            print(
                f"\nBatch {batch_start//batch_size + 1}: Processing genes {batch_start+1}-{batch_end} of {expected_count}"
            )

            # Get the batch of genes
            batch_genes = self.valid_targets[batch_start:batch_end]

            # Process genes in this batch
            batch_data = {}
            for gene in tqdm(batch_genes, desc="Processing batch"):
                try:
                    X, y, config, related_genes = self.prepare_training_data_for_gene(
                        gene
                    )
                    if X is not None:
                        processed_count += 1

                        if output_dir:
                            # Save to disk instead of keeping in memory
                            gene_path = os.path.join(data_dir, f"{gene}")
                            os.makedirs(gene_path, exist_ok=True)

                            # Save arrays and metadata
                            np.save(os.path.join(gene_path, "X.npy"), X)
                            np.save(os.path.join(gene_path, "y.npy"), y)

                            # Save config and related genes as JSON
                            with open(
                                os.path.join(gene_path, "metadata.json"), "w"
                            ) as f:
                                import json

                                json.dump(
                                    {
                                        "config": config,
                                        "related_genes": related_genes,
                                        "X_shape": X.shape,
                                        "y_shape": y.shape,
                                    },
                                    f,
                                )

                            # Just store path info in memory
                            processed_genes[gene] = {
                                "path": gene_path,
                                "X_shape": X.shape,
                                "y_shape": y.shape,
                                "num_related_genes": len(related_genes),
                            }
                        else:
                            # Store in memory
                            batch_data[gene] = {
                                "X": X,
                                "y": y,
                                "config": config,
                                "related_genes": related_genes,
                            }
                except Exception as e:
                    print(f"Error processing gene {gene}: {e}")
                    error_genes.append((gene, f"Processing error: {str(e)}"))

            # Update processed genes dictionary if keeping in memory
            if not output_dir:
                processed_genes.update(batch_data)

            # Print progress
            elapsed = time.time() - self.start_time
            print(f"Progress: {processed_count}/{expected_count} genes processed")
            print(f"Elapsed time: {elapsed:.1f} seconds ({elapsed/60:.1f} minutes)")
            print(
                f"Estimated remaining time: {(elapsed/processed_count)*(expected_count-processed_count)/60:.1f} minutes"
            )

            # Force garbage collection between batches
            gc.collect()

        # Print final summary
        elapsed_time = time.time() - self.start_time
        summary = {
            "processed_count": processed_count,
            "expected_count": expected_count,
            "error_count": len(error_genes),
            "total_time_seconds": elapsed_time,
            "errors": error_genes,
        }

        print(f"\nProcessing complete")
        print(f"Total genes processed: {processed_count}/{expected_count}")
        print(f"Genes with errors: {len(error_genes)}")
        print(f"Total time: {elapsed_time:.2f} seconds ({elapsed_time/60:.2f} minutes)")

        if error_genes:
            print("\nFirst 10 genes with errors:")
            for gene, error in error_genes[:10]:
                print(f"  {gene}: {error}")
            if len(error_genes) > 10:
                print(f"  ... and {len(error_genes) - 10} more")

        if output_dir:
            # Save summary to the output directory
            with open(os.path.join(output_dir, "processing_summary.json"), "w") as f:
                import json

                json.dump(summary, f, indent=2)
            print(f"Data saved to {output_dir}")

            # Create a manifest file for easy loading
            with open(os.path.join(output_dir, "gene_manifest.json"), "w") as f:
                json.dump(processed_genes, f)

        return {"genes": processed_genes, "summary": summary}

    def load_gene_data(self, gene, data_dir):
        """
        Load a single gene's data from disk.

        Args:
            gene: Name of the gene
            data_dir: Directory where data is stored

        Returns:
            Tuple of (X, y, config, related_genes)
        """
        gene_path = os.path.join(data_dir, "gene_data", gene)

        # Load arrays
        X = np.load(os.path.join(gene_path, "X.npy"))
        y = np.load(os.path.join(gene_path, "y.npy"))

        # Load metadata
        with open(os.path.join(gene_path, "metadata.json"), "r") as f:
            import json

            metadata = json.load(f)

        return X, y, metadata["config"], metadata["related_genes"]

Loading Them

In [5]:
from pathlib import Path


# Initialize and load data
processor = OptimizedGeneDataProcessor()
processor.load_data(Path("Data/expression_data1.h5ad"), Path("Data/net_grn.tsv"))

# # For smaller datasets (keep in memory)
# results = processor.process_in_batches(batch_size=100)
# gene_data = results["genes"]  # Dictionary with all processed gene data

# OR for larger datasets (save to disk)
results = processor.process_in_batches(batch_size=100, output_dir="processed_gene_data")

Loading expression data...
Loading network data...
Found 23427 valid target genes in the network.
Processing 23427 genes in batches of 100

Batch 1: Processing genes 1-100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 49.71it/s]


Progress: 100/23427 genes processed
Elapsed time: 2.1 seconds (0.0 minutes)
Estimated remaining time: 8.0 minutes

Batch 2: Processing genes 101-200 of 23427


Processing batch: 100%|██████████| 100/100 [00:01<00:00, 50.50it/s]


Progress: 200/23427 genes processed
Elapsed time: 4.3 seconds (0.1 minutes)
Estimated remaining time: 8.3 minutes

Batch 3: Processing genes 201-300 of 23427


Processing batch: 100%|██████████| 100/100 [00:01<00:00, 50.02it/s]


Progress: 300/23427 genes processed
Elapsed time: 6.5 seconds (0.1 minutes)
Estimated remaining time: 8.3 minutes

Batch 4: Processing genes 301-400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.09it/s]


Progress: 400/23427 genes processed
Elapsed time: 8.9 seconds (0.1 minutes)
Estimated remaining time: 8.5 minutes

Batch 5: Processing genes 401-500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.33it/s]


Progress: 500/23427 genes processed
Elapsed time: 11.6 seconds (0.2 minutes)
Estimated remaining time: 8.8 minutes

Batch 6: Processing genes 501-600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.09it/s]


Progress: 600/23427 genes processed
Elapsed time: 14.3 seconds (0.2 minutes)
Estimated remaining time: 9.1 minutes

Batch 7: Processing genes 601-700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.67it/s]


Progress: 700/23427 genes processed
Elapsed time: 17.1 seconds (0.3 minutes)
Estimated remaining time: 9.3 minutes

Batch 8: Processing genes 701-800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.22it/s]


Progress: 800/23427 genes processed
Elapsed time: 19.8 seconds (0.3 minutes)
Estimated remaining time: 9.3 minutes

Batch 9: Processing genes 801-900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 43.01it/s]


Progress: 900/23427 genes processed
Elapsed time: 22.3 seconds (0.4 minutes)
Estimated remaining time: 9.3 minutes

Batch 10: Processing genes 901-1000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.05it/s]


Progress: 1000/23427 genes processed
Elapsed time: 24.8 seconds (0.4 minutes)
Estimated remaining time: 9.3 minutes

Batch 11: Processing genes 1001-1100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.34it/s]


Progress: 1100/23427 genes processed
Elapsed time: 27.4 seconds (0.5 minutes)
Estimated remaining time: 9.3 minutes

Batch 12: Processing genes 1101-1200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.70it/s]


Progress: 1200/23427 genes processed
Elapsed time: 30.0 seconds (0.5 minutes)
Estimated remaining time: 9.3 minutes

Batch 13: Processing genes 1201-1300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.78it/s]


Progress: 1300/23427 genes processed
Elapsed time: 32.7 seconds (0.5 minutes)
Estimated remaining time: 9.3 minutes

Batch 14: Processing genes 1301-1400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.22it/s]


Progress: 1400/23427 genes processed
Elapsed time: 35.3 seconds (0.6 minutes)
Estimated remaining time: 9.2 minutes

Batch 15: Processing genes 1401-1500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.51it/s]


Progress: 1500/23427 genes processed
Elapsed time: 37.9 seconds (0.6 minutes)
Estimated remaining time: 9.2 minutes

Batch 16: Processing genes 1501-1600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.18it/s]


Progress: 1600/23427 genes processed
Elapsed time: 40.5 seconds (0.7 minutes)
Estimated remaining time: 9.2 minutes

Batch 17: Processing genes 1601-1700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.65it/s]


Progress: 1700/23427 genes processed
Elapsed time: 43.2 seconds (0.7 minutes)
Estimated remaining time: 9.2 minutes

Batch 18: Processing genes 1701-1800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.18it/s]


Progress: 1800/23427 genes processed
Elapsed time: 45.9 seconds (0.8 minutes)
Estimated remaining time: 9.2 minutes

Batch 19: Processing genes 1801-1900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.07it/s]


Progress: 1900/23427 genes processed
Elapsed time: 48.5 seconds (0.8 minutes)
Estimated remaining time: 9.2 minutes

Batch 20: Processing genes 1901-2000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.56it/s]


Progress: 2000/23427 genes processed
Elapsed time: 51.2 seconds (0.9 minutes)
Estimated remaining time: 9.1 minutes

Batch 21: Processing genes 2001-2100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 42.37it/s]


Progress: 2100/23427 genes processed
Elapsed time: 53.7 seconds (0.9 minutes)
Estimated remaining time: 9.1 minutes

Batch 22: Processing genes 2101-2200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.08it/s]


Progress: 2200/23427 genes processed
Elapsed time: 56.2 seconds (0.9 minutes)
Estimated remaining time: 9.0 minutes

Batch 23: Processing genes 2201-2300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.09it/s]


Progress: 2300/23427 genes processed
Elapsed time: 58.9 seconds (1.0 minutes)
Estimated remaining time: 9.0 minutes

Batch 24: Processing genes 2301-2400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.58it/s]


Progress: 2400/23427 genes processed
Elapsed time: 61.5 seconds (1.0 minutes)
Estimated remaining time: 9.0 minutes

Batch 25: Processing genes 2401-2500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.51it/s]


Progress: 2500/23427 genes processed
Elapsed time: 64.1 seconds (1.1 minutes)
Estimated remaining time: 8.9 minutes

Batch 26: Processing genes 2501-2600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 43.56it/s]


Progress: 2600/23427 genes processed
Elapsed time: 66.5 seconds (1.1 minutes)
Estimated remaining time: 8.9 minutes

Batch 27: Processing genes 2601-2700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.95it/s]


Progress: 2700/23427 genes processed
Elapsed time: 69.1 seconds (1.2 minutes)
Estimated remaining time: 8.8 minutes

Batch 28: Processing genes 2701-2800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.78it/s]


Progress: 2800/23427 genes processed
Elapsed time: 71.7 seconds (1.2 minutes)
Estimated remaining time: 8.8 minutes

Batch 29: Processing genes 2801-2900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.71it/s]


Progress: 2900/23427 genes processed
Elapsed time: 74.3 seconds (1.2 minutes)
Estimated remaining time: 8.8 minutes

Batch 30: Processing genes 2901-3000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.00it/s]


Progress: 3000/23427 genes processed
Elapsed time: 76.9 seconds (1.3 minutes)
Estimated remaining time: 8.7 minutes

Batch 31: Processing genes 3001-3100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.83it/s]


Progress: 3100/23427 genes processed
Elapsed time: 79.4 seconds (1.3 minutes)
Estimated remaining time: 8.7 minutes

Batch 32: Processing genes 3101-3200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.79it/s]


Progress: 3200/23427 genes processed
Elapsed time: 82.0 seconds (1.4 minutes)
Estimated remaining time: 8.6 minutes

Batch 33: Processing genes 3201-3300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 42.26it/s]


Progress: 3300/23427 genes processed
Elapsed time: 84.5 seconds (1.4 minutes)
Estimated remaining time: 8.6 minutes

Batch 34: Processing genes 3301-3400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.11it/s]


Progress: 3400/23427 genes processed
Elapsed time: 87.1 seconds (1.5 minutes)
Estimated remaining time: 8.6 minutes

Batch 35: Processing genes 3401-3500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.57it/s]


Progress: 3500/23427 genes processed
Elapsed time: 89.8 seconds (1.5 minutes)
Estimated remaining time: 8.5 minutes

Batch 36: Processing genes 3501-3600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.68it/s]


Progress: 3600/23427 genes processed
Elapsed time: 92.3 seconds (1.5 minutes)
Estimated remaining time: 8.5 minutes

Batch 37: Processing genes 3601-3700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 42.29it/s]


Progress: 3700/23427 genes processed
Elapsed time: 94.8 seconds (1.6 minutes)
Estimated remaining time: 8.4 minutes

Batch 38: Processing genes 3701-3800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.02it/s]


Progress: 3800/23427 genes processed
Elapsed time: 97.4 seconds (1.6 minutes)
Estimated remaining time: 8.4 minutes

Batch 39: Processing genes 3801-3900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 42.26it/s]


Progress: 3900/23427 genes processed
Elapsed time: 99.9 seconds (1.7 minutes)
Estimated remaining time: 8.3 minutes

Batch 40: Processing genes 3901-4000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 42.23it/s]


Progress: 4000/23427 genes processed
Elapsed time: 102.5 seconds (1.7 minutes)
Estimated remaining time: 8.3 minutes

Batch 41: Processing genes 4001-4100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.97it/s]


Progress: 4100/23427 genes processed
Elapsed time: 105.2 seconds (1.8 minutes)
Estimated remaining time: 8.3 minutes

Batch 42: Processing genes 4101-4200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.21it/s]


Progress: 4200/23427 genes processed
Elapsed time: 107.7 seconds (1.8 minutes)
Estimated remaining time: 8.2 minutes

Batch 43: Processing genes 4201-4300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.13it/s]


Progress: 4300/23427 genes processed
Elapsed time: 110.4 seconds (1.8 minutes)
Estimated remaining time: 8.2 minutes

Batch 44: Processing genes 4301-4400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 46.32it/s]


Progress: 4400/23427 genes processed
Elapsed time: 112.7 seconds (1.9 minutes)
Estimated remaining time: 8.1 minutes

Batch 45: Processing genes 4401-4500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.61it/s]


Progress: 4500/23427 genes processed
Elapsed time: 115.1 seconds (1.9 minutes)
Estimated remaining time: 8.1 minutes

Batch 46: Processing genes 4501-4600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 46.32it/s]


Progress: 4600/23427 genes processed
Elapsed time: 117.4 seconds (2.0 minutes)
Estimated remaining time: 8.0 minutes

Batch 47: Processing genes 4601-4700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 43.94it/s]


Progress: 4700/23427 genes processed
Elapsed time: 119.8 seconds (2.0 minutes)
Estimated remaining time: 8.0 minutes

Batch 48: Processing genes 4701-4800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.95it/s]


Progress: 4800/23427 genes processed
Elapsed time: 122.1 seconds (2.0 minutes)
Estimated remaining time: 7.9 minutes

Batch 49: Processing genes 4801-4900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 46.66it/s]


Progress: 4900/23427 genes processed
Elapsed time: 124.4 seconds (2.1 minutes)
Estimated remaining time: 7.8 minutes

Batch 50: Processing genes 4901-5000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 42.20it/s]


Progress: 5000/23427 genes processed
Elapsed time: 126.9 seconds (2.1 minutes)
Estimated remaining time: 7.8 minutes

Batch 51: Processing genes 5001-5100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 42.81it/s]


Progress: 5100/23427 genes processed
Elapsed time: 129.4 seconds (2.2 minutes)
Estimated remaining time: 7.7 minutes

Batch 52: Processing genes 5101-5200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 46.81it/s]


Progress: 5200/23427 genes processed
Elapsed time: 131.7 seconds (2.2 minutes)
Estimated remaining time: 7.7 minutes

Batch 53: Processing genes 5201-5300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 47.58it/s]


Progress: 5300/23427 genes processed
Elapsed time: 133.9 seconds (2.2 minutes)
Estimated remaining time: 7.6 minutes

Batch 54: Processing genes 5301-5400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.43it/s]


Progress: 5400/23427 genes processed
Elapsed time: 136.2 seconds (2.3 minutes)
Estimated remaining time: 7.6 minutes

Batch 55: Processing genes 5401-5500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.69it/s]


Progress: 5500/23427 genes processed
Elapsed time: 138.5 seconds (2.3 minutes)
Estimated remaining time: 7.5 minutes

Batch 56: Processing genes 5501-5600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 46.00it/s]


Progress: 5600/23427 genes processed
Elapsed time: 140.8 seconds (2.3 minutes)
Estimated remaining time: 7.5 minutes

Batch 57: Processing genes 5601-5700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.62it/s]


Progress: 5700/23427 genes processed
Elapsed time: 143.2 seconds (2.4 minutes)
Estimated remaining time: 7.4 minutes

Batch 58: Processing genes 5701-5800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.63it/s]


Progress: 5800/23427 genes processed
Elapsed time: 145.5 seconds (2.4 minutes)
Estimated remaining time: 7.4 minutes

Batch 59: Processing genes 5801-5900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 42.84it/s]


Progress: 5900/23427 genes processed
Elapsed time: 148.0 seconds (2.5 minutes)
Estimated remaining time: 7.3 minutes

Batch 60: Processing genes 5901-6000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.02it/s]


Progress: 6000/23427 genes processed
Elapsed time: 150.4 seconds (2.5 minutes)
Estimated remaining time: 7.3 minutes

Batch 61: Processing genes 6001-6100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.60it/s]


Progress: 6100/23427 genes processed
Elapsed time: 153.0 seconds (2.6 minutes)
Estimated remaining time: 7.2 minutes

Batch 62: Processing genes 6101-6200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 36.10it/s]


Progress: 6200/23427 genes processed
Elapsed time: 155.9 seconds (2.6 minutes)
Estimated remaining time: 7.2 minutes

Batch 63: Processing genes 6201-6300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 35.73it/s]


Progress: 6300/23427 genes processed
Elapsed time: 158.9 seconds (2.6 minutes)
Estimated remaining time: 7.2 minutes

Batch 64: Processing genes 6301-6400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.27it/s]


Progress: 6400/23427 genes processed
Elapsed time: 161.7 seconds (2.7 minutes)
Estimated remaining time: 7.2 minutes

Batch 65: Processing genes 6401-6500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.17it/s]


Progress: 6500/23427 genes processed
Elapsed time: 164.4 seconds (2.7 minutes)
Estimated remaining time: 7.1 minutes

Batch 66: Processing genes 6501-6600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.72it/s]


Progress: 6600/23427 genes processed
Elapsed time: 167.2 seconds (2.8 minutes)
Estimated remaining time: 7.1 minutes

Batch 67: Processing genes 6601-6700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.66it/s]


Progress: 6700/23427 genes processed
Elapsed time: 170.0 seconds (2.8 minutes)
Estimated remaining time: 7.1 minutes

Batch 68: Processing genes 6701-6800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.11it/s]


Progress: 6800/23427 genes processed
Elapsed time: 172.7 seconds (2.9 minutes)
Estimated remaining time: 7.0 minutes

Batch 69: Processing genes 6801-6900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.47it/s]


Progress: 6900/23427 genes processed
Elapsed time: 175.5 seconds (2.9 minutes)
Estimated remaining time: 7.0 minutes

Batch 70: Processing genes 6901-7000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 35.11it/s]


Progress: 7000/23427 genes processed
Elapsed time: 178.5 seconds (3.0 minutes)
Estimated remaining time: 7.0 minutes

Batch 71: Processing genes 7001-7100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.59it/s]


Progress: 7100/23427 genes processed
Elapsed time: 181.7 seconds (3.0 minutes)
Estimated remaining time: 7.0 minutes

Batch 72: Processing genes 7101-7200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 36.62it/s]


Progress: 7200/23427 genes processed
Elapsed time: 184.6 seconds (3.1 minutes)
Estimated remaining time: 6.9 minutes

Batch 73: Processing genes 7201-7300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 34.44it/s]


Progress: 7300/23427 genes processed
Elapsed time: 187.6 seconds (3.1 minutes)
Estimated remaining time: 6.9 minutes

Batch 74: Processing genes 7301-7400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.62it/s]


Progress: 7400/23427 genes processed
Elapsed time: 190.8 seconds (3.2 minutes)
Estimated remaining time: 6.9 minutes

Batch 75: Processing genes 7401-7500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 36.33it/s]


Progress: 7500/23427 genes processed
Elapsed time: 193.7 seconds (3.2 minutes)
Estimated remaining time: 6.9 minutes

Batch 76: Processing genes 7501-7600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.15it/s]


Progress: 7600/23427 genes processed
Elapsed time: 196.4 seconds (3.3 minutes)
Estimated remaining time: 6.8 minutes

Batch 77: Processing genes 7601-7700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 36.70it/s]


Progress: 7700/23427 genes processed
Elapsed time: 199.3 seconds (3.3 minutes)
Estimated remaining time: 6.8 minutes

Batch 78: Processing genes 7701-7800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.52it/s]


Progress: 7800/23427 genes processed
Elapsed time: 202.4 seconds (3.4 minutes)
Estimated remaining time: 6.8 minutes

Batch 79: Processing genes 7801-7900 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 29.15it/s]


Progress: 7900/23427 genes processed
Elapsed time: 206.0 seconds (3.4 minutes)
Estimated remaining time: 6.7 minutes

Batch 80: Processing genes 7901-8000 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 27.55it/s]


Progress: 8000/23427 genes processed
Elapsed time: 209.8 seconds (3.5 minutes)
Estimated remaining time: 6.7 minutes

Batch 81: Processing genes 8001-8100 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 28.46it/s]


Progress: 8100/23427 genes processed
Elapsed time: 213.5 seconds (3.6 minutes)
Estimated remaining time: 6.7 minutes

Batch 82: Processing genes 8101-8200 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 25.20it/s]


Progress: 8200/23427 genes processed
Elapsed time: 217.7 seconds (3.6 minutes)
Estimated remaining time: 6.7 minutes

Batch 83: Processing genes 8201-8300 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 28.51it/s]


Progress: 8300/23427 genes processed
Elapsed time: 221.4 seconds (3.7 minutes)
Estimated remaining time: 6.7 minutes

Batch 84: Processing genes 8301-8400 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 28.03it/s]


Progress: 8400/23427 genes processed
Elapsed time: 225.1 seconds (3.8 minutes)
Estimated remaining time: 6.7 minutes

Batch 85: Processing genes 8401-8500 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 26.08it/s]


Progress: 8500/23427 genes processed
Elapsed time: 229.2 seconds (3.8 minutes)
Estimated remaining time: 6.7 minutes

Batch 86: Processing genes 8501-8600 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 27.36it/s]


Progress: 8600/23427 genes processed
Elapsed time: 233.0 seconds (3.9 minutes)
Estimated remaining time: 6.7 minutes

Batch 87: Processing genes 8601-8700 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 26.75it/s]


Progress: 8700/23427 genes processed
Elapsed time: 236.9 seconds (3.9 minutes)
Estimated remaining time: 6.7 minutes

Batch 88: Processing genes 8701-8800 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 26.34it/s]


Progress: 8800/23427 genes processed
Elapsed time: 240.9 seconds (4.0 minutes)
Estimated remaining time: 6.7 minutes

Batch 89: Processing genes 8801-8900 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 26.05it/s]


Progress: 8900/23427 genes processed
Elapsed time: 244.9 seconds (4.1 minutes)
Estimated remaining time: 6.7 minutes

Batch 90: Processing genes 8901-9000 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 26.24it/s]


Progress: 9000/23427 genes processed
Elapsed time: 248.9 seconds (4.1 minutes)
Estimated remaining time: 6.7 minutes

Batch 91: Processing genes 9001-9100 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 25.97it/s]


Progress: 9100/23427 genes processed
Elapsed time: 253.0 seconds (4.2 minutes)
Estimated remaining time: 6.6 minutes

Batch 92: Processing genes 9101-9200 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 26.18it/s]


Progress: 9200/23427 genes processed
Elapsed time: 257.0 seconds (4.3 minutes)
Estimated remaining time: 6.6 minutes

Batch 93: Processing genes 9201-9300 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 27.01it/s]


Progress: 9300/23427 genes processed
Elapsed time: 260.9 seconds (4.3 minutes)
Estimated remaining time: 6.6 minutes

Batch 94: Processing genes 9301-9400 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 27.26it/s]


Progress: 9400/23427 genes processed
Elapsed time: 264.7 seconds (4.4 minutes)
Estimated remaining time: 6.6 minutes

Batch 95: Processing genes 9401-9500 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 26.53it/s]


Progress: 9500/23427 genes processed
Elapsed time: 268.6 seconds (4.5 minutes)
Estimated remaining time: 6.6 minutes

Batch 96: Processing genes 9501-9600 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 26.21it/s]


Progress: 9600/23427 genes processed
Elapsed time: 272.7 seconds (4.5 minutes)
Estimated remaining time: 6.5 minutes

Batch 97: Processing genes 9601-9700 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 27.29it/s]


Progress: 9700/23427 genes processed
Elapsed time: 276.5 seconds (4.6 minutes)
Estimated remaining time: 6.5 minutes

Batch 98: Processing genes 9701-9800 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 27.91it/s]


Progress: 9800/23427 genes processed
Elapsed time: 280.3 seconds (4.7 minutes)
Estimated remaining time: 6.5 minutes

Batch 99: Processing genes 9801-9900 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 27.54it/s]


Progress: 9900/23427 genes processed
Elapsed time: 284.1 seconds (4.7 minutes)
Estimated remaining time: 6.5 minutes

Batch 100: Processing genes 9901-10000 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 27.90it/s]


Progress: 10000/23427 genes processed
Elapsed time: 287.9 seconds (4.8 minutes)
Estimated remaining time: 6.4 minutes

Batch 101: Processing genes 10001-10100 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 28.20it/s]


Progress: 10100/23427 genes processed
Elapsed time: 291.6 seconds (4.9 minutes)
Estimated remaining time: 6.4 minutes

Batch 102: Processing genes 10101-10200 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 27.81it/s]


Progress: 10200/23427 genes processed
Elapsed time: 295.3 seconds (4.9 minutes)
Estimated remaining time: 6.4 minutes

Batch 103: Processing genes 10201-10300 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 28.27it/s]


Progress: 10300/23427 genes processed
Elapsed time: 299.1 seconds (5.0 minutes)
Estimated remaining time: 6.4 minutes

Batch 104: Processing genes 10301-10400 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 28.41it/s]


Progress: 10400/23427 genes processed
Elapsed time: 302.8 seconds (5.0 minutes)
Estimated remaining time: 6.3 minutes

Batch 105: Processing genes 10401-10500 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 27.80it/s]


Progress: 10500/23427 genes processed
Elapsed time: 306.5 seconds (5.1 minutes)
Estimated remaining time: 6.3 minutes

Batch 106: Processing genes 10501-10600 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 29.53it/s]


Progress: 10600/23427 genes processed
Elapsed time: 310.1 seconds (5.2 minutes)
Estimated remaining time: 6.3 minutes

Batch 107: Processing genes 10601-10700 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 28.46it/s]


Progress: 10700/23427 genes processed
Elapsed time: 313.8 seconds (5.2 minutes)
Estimated remaining time: 6.2 minutes

Batch 108: Processing genes 10701-10800 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 27.74it/s]


Progress: 10800/23427 genes processed
Elapsed time: 317.6 seconds (5.3 minutes)
Estimated remaining time: 6.2 minutes

Batch 109: Processing genes 10801-10900 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 29.99it/s]


Progress: 10900/23427 genes processed
Elapsed time: 321.1 seconds (5.4 minutes)
Estimated remaining time: 6.2 minutes

Batch 110: Processing genes 10901-11000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 46.37it/s]


Progress: 11000/23427 genes processed
Elapsed time: 323.4 seconds (5.4 minutes)
Estimated remaining time: 6.1 minutes

Batch 111: Processing genes 11001-11100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.91it/s]


Progress: 11100/23427 genes processed
Elapsed time: 325.8 seconds (5.4 minutes)
Estimated remaining time: 6.0 minutes

Batch 112: Processing genes 11101-11200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.19it/s]


Progress: 11200/23427 genes processed
Elapsed time: 328.1 seconds (5.5 minutes)
Estimated remaining time: 6.0 minutes

Batch 113: Processing genes 11201-11300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.20it/s]


Progress: 11300/23427 genes processed
Elapsed time: 330.4 seconds (5.5 minutes)
Estimated remaining time: 5.9 minutes

Batch 114: Processing genes 11301-11400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.09it/s]


Progress: 11400/23427 genes processed
Elapsed time: 332.8 seconds (5.5 minutes)
Estimated remaining time: 5.9 minutes

Batch 115: Processing genes 11401-11500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.66it/s]


Progress: 11500/23427 genes processed
Elapsed time: 335.1 seconds (5.6 minutes)
Estimated remaining time: 5.8 minutes

Batch 116: Processing genes 11501-11600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.48it/s]


Progress: 11600/23427 genes processed
Elapsed time: 337.5 seconds (5.6 minutes)
Estimated remaining time: 5.7 minutes

Batch 117: Processing genes 11601-11700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 46.56it/s]


Progress: 11700/23427 genes processed
Elapsed time: 339.8 seconds (5.7 minutes)
Estimated remaining time: 5.7 minutes

Batch 118: Processing genes 11701-11800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.95it/s]


Progress: 11800/23427 genes processed
Elapsed time: 342.1 seconds (5.7 minutes)
Estimated remaining time: 5.6 minutes

Batch 119: Processing genes 11801-11900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.35it/s]


Progress: 11900/23427 genes processed
Elapsed time: 344.4 seconds (5.7 minutes)
Estimated remaining time: 5.6 minutes

Batch 120: Processing genes 11901-12000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.04it/s]


Progress: 12000/23427 genes processed
Elapsed time: 346.8 seconds (5.8 minutes)
Estimated remaining time: 5.5 minutes

Batch 121: Processing genes 12001-12100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.50it/s]


Progress: 12100/23427 genes processed
Elapsed time: 349.2 seconds (5.8 minutes)
Estimated remaining time: 5.4 minutes

Batch 122: Processing genes 12101-12200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.13it/s]


Progress: 12200/23427 genes processed
Elapsed time: 351.6 seconds (5.9 minutes)
Estimated remaining time: 5.4 minutes

Batch 123: Processing genes 12201-12300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 46.37it/s]


Progress: 12300/23427 genes processed
Elapsed time: 353.9 seconds (5.9 minutes)
Estimated remaining time: 5.3 minutes

Batch 124: Processing genes 12301-12400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.03it/s]


Progress: 12400/23427 genes processed
Elapsed time: 356.2 seconds (5.9 minutes)
Estimated remaining time: 5.3 minutes

Batch 125: Processing genes 12401-12500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.07it/s]


Progress: 12500/23427 genes processed
Elapsed time: 358.6 seconds (6.0 minutes)
Estimated remaining time: 5.2 minutes

Batch 126: Processing genes 12501-12600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 43.94it/s]


Progress: 12600/23427 genes processed
Elapsed time: 361.0 seconds (6.0 minutes)
Estimated remaining time: 5.2 minutes

Batch 127: Processing genes 12601-12700 of 23427


Processing batch: 100%|██████████| 100/100 [00:01<00:00, 52.95it/s]


Progress: 12700/23427 genes processed
Elapsed time: 363.0 seconds (6.1 minutes)
Estimated remaining time: 5.1 minutes

Batch 128: Processing genes 12701-12800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 47.91it/s]


Progress: 12800/23427 genes processed
Elapsed time: 365.3 seconds (6.1 minutes)
Estimated remaining time: 5.1 minutes

Batch 129: Processing genes 12801-12900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.41it/s]


Progress: 12900/23427 genes processed
Elapsed time: 367.7 seconds (6.1 minutes)
Estimated remaining time: 5.0 minutes

Batch 130: Processing genes 12901-13000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.75it/s]


Progress: 13000/23427 genes processed
Elapsed time: 370.0 seconds (6.2 minutes)
Estimated remaining time: 4.9 minutes

Batch 131: Processing genes 13001-13100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.80it/s]


Progress: 13100/23427 genes processed
Elapsed time: 372.4 seconds (6.2 minutes)
Estimated remaining time: 4.9 minutes

Batch 132: Processing genes 13101-13200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.43it/s]


Progress: 13200/23427 genes processed
Elapsed time: 374.7 seconds (6.2 minutes)
Estimated remaining time: 4.8 minutes

Batch 133: Processing genes 13201-13300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.01it/s]


Progress: 13300/23427 genes processed
Elapsed time: 377.1 seconds (6.3 minutes)
Estimated remaining time: 4.8 minutes

Batch 134: Processing genes 13301-13400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.71it/s]


Progress: 13400/23427 genes processed
Elapsed time: 379.4 seconds (6.3 minutes)
Estimated remaining time: 4.7 minutes

Batch 135: Processing genes 13401-13500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 43.96it/s]


Progress: 13500/23427 genes processed
Elapsed time: 381.8 seconds (6.4 minutes)
Estimated remaining time: 4.7 minutes

Batch 136: Processing genes 13501-13600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 47.32it/s]


Progress: 13600/23427 genes processed
Elapsed time: 384.1 seconds (6.4 minutes)
Estimated remaining time: 4.6 minutes

Batch 137: Processing genes 13601-13700 of 23427


Processing batch: 100%|██████████| 100/100 [00:01<00:00, 53.22it/s]


Progress: 13700/23427 genes processed
Elapsed time: 386.1 seconds (6.4 minutes)
Estimated remaining time: 4.6 minutes

Batch 138: Processing genes 13701-13800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.41it/s]


Progress: 13800/23427 genes processed
Elapsed time: 388.5 seconds (6.5 minutes)
Estimated remaining time: 4.5 minutes

Batch 139: Processing genes 13801-13900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.30it/s]


Progress: 13900/23427 genes processed
Elapsed time: 390.8 seconds (6.5 minutes)
Estimated remaining time: 4.5 minutes

Batch 140: Processing genes 13901-14000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 47.04it/s]


Progress: 14000/23427 genes processed
Elapsed time: 393.1 seconds (6.6 minutes)
Estimated remaining time: 4.4 minutes

Batch 141: Processing genes 14001-14100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.19it/s]


Progress: 14100/23427 genes processed
Elapsed time: 395.4 seconds (6.6 minutes)
Estimated remaining time: 4.4 minutes

Batch 142: Processing genes 14101-14200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.34it/s]


Progress: 14200/23427 genes processed
Elapsed time: 397.8 seconds (6.6 minutes)
Estimated remaining time: 4.3 minutes

Batch 143: Processing genes 14201-14300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.31it/s]


Progress: 14300/23427 genes processed
Elapsed time: 400.2 seconds (6.7 minutes)
Estimated remaining time: 4.3 minutes

Batch 144: Processing genes 14301-14400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 43.59it/s]


Progress: 14400/23427 genes processed
Elapsed time: 402.7 seconds (6.7 minutes)
Estimated remaining time: 4.2 minutes

Batch 145: Processing genes 14401-14500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.76it/s]


Progress: 14500/23427 genes processed
Elapsed time: 405.2 seconds (6.8 minutes)
Estimated remaining time: 4.2 minutes

Batch 146: Processing genes 14501-14600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 44.78it/s]


Progress: 14600/23427 genes processed
Elapsed time: 407.6 seconds (6.8 minutes)
Estimated remaining time: 4.1 minutes

Batch 147: Processing genes 14601-14700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 46.46it/s]


Progress: 14700/23427 genes processed
Elapsed time: 409.9 seconds (6.8 minutes)
Estimated remaining time: 4.1 minutes

Batch 148: Processing genes 14701-14800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.45it/s]


Progress: 14800/23427 genes processed
Elapsed time: 412.2 seconds (6.9 minutes)
Estimated remaining time: 4.0 minutes

Batch 149: Processing genes 14801-14900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 46.15it/s]


Progress: 14900/23427 genes processed
Elapsed time: 414.5 seconds (6.9 minutes)
Estimated remaining time: 4.0 minutes

Batch 150: Processing genes 14901-15000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.88it/s]


Progress: 15000/23427 genes processed
Elapsed time: 416.8 seconds (6.9 minutes)
Estimated remaining time: 3.9 minutes

Batch 151: Processing genes 15001-15100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.84it/s]


Progress: 15100/23427 genes processed
Elapsed time: 419.2 seconds (7.0 minutes)
Estimated remaining time: 3.9 minutes

Batch 152: Processing genes 15101-15200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 46.34it/s]


Progress: 15200/23427 genes processed
Elapsed time: 421.4 seconds (7.0 minutes)
Estimated remaining time: 3.8 minutes

Batch 153: Processing genes 15201-15300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.97it/s]


Progress: 15300/23427 genes processed
Elapsed time: 423.7 seconds (7.1 minutes)
Estimated remaining time: 3.8 minutes

Batch 154: Processing genes 15301-15400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 45.33it/s]


Progress: 15400/23427 genes processed
Elapsed time: 426.1 seconds (7.1 minutes)
Estimated remaining time: 3.7 minutes

Batch 155: Processing genes 15401-15500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.83it/s]


Progress: 15500/23427 genes processed
Elapsed time: 428.6 seconds (7.1 minutes)
Estimated remaining time: 3.7 minutes

Batch 156: Processing genes 15501-15600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.97it/s]


Progress: 15600/23427 genes processed
Elapsed time: 431.5 seconds (7.2 minutes)
Estimated remaining time: 3.6 minutes

Batch 157: Processing genes 15601-15700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.63it/s]


Progress: 15700/23427 genes processed
Elapsed time: 434.2 seconds (7.2 minutes)
Estimated remaining time: 3.6 minutes

Batch 158: Processing genes 15701-15800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.49it/s]


Progress: 15800/23427 genes processed
Elapsed time: 437.0 seconds (7.3 minutes)
Estimated remaining time: 3.5 minutes

Batch 159: Processing genes 15801-15900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.31it/s]


Progress: 15900/23427 genes processed
Elapsed time: 439.9 seconds (7.3 minutes)
Estimated remaining time: 3.5 minutes

Batch 160: Processing genes 15901-16000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.48it/s]


Progress: 16000/23427 genes processed
Elapsed time: 442.6 seconds (7.4 minutes)
Estimated remaining time: 3.4 minutes

Batch 161: Processing genes 16001-16100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.62it/s]


Progress: 16100/23427 genes processed
Elapsed time: 445.3 seconds (7.4 minutes)
Estimated remaining time: 3.4 minutes

Batch 162: Processing genes 16101-16200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.92it/s]


Progress: 16200/23427 genes processed
Elapsed time: 448.1 seconds (7.5 minutes)
Estimated remaining time: 3.3 minutes

Batch 163: Processing genes 16201-16300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 42.42it/s]


Progress: 16300/23427 genes processed
Elapsed time: 450.6 seconds (7.5 minutes)
Estimated remaining time: 3.3 minutes

Batch 164: Processing genes 16301-16400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.68it/s]


Progress: 16400/23427 genes processed
Elapsed time: 453.2 seconds (7.6 minutes)
Estimated remaining time: 3.2 minutes

Batch 165: Processing genes 16401-16500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.42it/s]


Progress: 16500/23427 genes processed
Elapsed time: 455.8 seconds (7.6 minutes)
Estimated remaining time: 3.2 minutes

Batch 166: Processing genes 16501-16600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 36.84it/s]


Progress: 16600/23427 genes processed
Elapsed time: 458.7 seconds (7.6 minutes)
Estimated remaining time: 3.1 minutes

Batch 167: Processing genes 16601-16700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.71it/s]


Progress: 16700/23427 genes processed
Elapsed time: 461.5 seconds (7.7 minutes)
Estimated remaining time: 3.1 minutes

Batch 168: Processing genes 16701-16800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 36.27it/s]


Progress: 16800/23427 genes processed
Elapsed time: 464.4 seconds (7.7 minutes)
Estimated remaining time: 3.1 minutes

Batch 169: Processing genes 16801-16900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.18it/s]


Progress: 16900/23427 genes processed
Elapsed time: 467.1 seconds (7.8 minutes)
Estimated remaining time: 3.0 minutes

Batch 170: Processing genes 16901-17000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.35it/s]


Progress: 17000/23427 genes processed
Elapsed time: 469.8 seconds (7.8 minutes)
Estimated remaining time: 3.0 minutes

Batch 171: Processing genes 17001-17100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.92it/s]


Progress: 17100/23427 genes processed
Elapsed time: 472.6 seconds (7.9 minutes)
Estimated remaining time: 2.9 minutes

Batch 172: Processing genes 17101-17200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.74it/s]


Progress: 17200/23427 genes processed
Elapsed time: 475.1 seconds (7.9 minutes)
Estimated remaining time: 2.9 minutes

Batch 173: Processing genes 17201-17300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.51it/s]


Progress: 17300/23427 genes processed
Elapsed time: 477.8 seconds (8.0 minutes)
Estimated remaining time: 2.8 minutes

Batch 174: Processing genes 17301-17400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.77it/s]


Progress: 17400/23427 genes processed
Elapsed time: 480.5 seconds (8.0 minutes)
Estimated remaining time: 2.8 minutes

Batch 175: Processing genes 17401-17500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.01it/s]


Progress: 17500/23427 genes processed
Elapsed time: 483.3 seconds (8.1 minutes)
Estimated remaining time: 2.7 minutes

Batch 176: Processing genes 17501-17600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 41.10it/s]


Progress: 17600/23427 genes processed
Elapsed time: 485.8 seconds (8.1 minutes)
Estimated remaining time: 2.7 minutes

Batch 177: Processing genes 17601-17700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.06it/s]


Progress: 17700/23427 genes processed
Elapsed time: 488.6 seconds (8.1 minutes)
Estimated remaining time: 2.6 minutes

Batch 178: Processing genes 17701-17800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.90it/s]


Progress: 17800/23427 genes processed
Elapsed time: 491.4 seconds (8.2 minutes)
Estimated remaining time: 2.6 minutes

Batch 179: Processing genes 17801-17900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.70it/s]


Progress: 17900/23427 genes processed
Elapsed time: 494.1 seconds (8.2 minutes)
Estimated remaining time: 2.5 minutes

Batch 180: Processing genes 17901-18000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.92it/s]


Progress: 18000/23427 genes processed
Elapsed time: 496.9 seconds (8.3 minutes)
Estimated remaining time: 2.5 minutes

Batch 181: Processing genes 18001-18100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.33it/s]


Progress: 18100/23427 genes processed
Elapsed time: 499.6 seconds (8.3 minutes)
Estimated remaining time: 2.5 minutes

Batch 182: Processing genes 18101-18200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.52it/s]


Progress: 18200/23427 genes processed
Elapsed time: 502.2 seconds (8.4 minutes)
Estimated remaining time: 2.4 minutes

Batch 183: Processing genes 18201-18300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.45it/s]


Progress: 18300/23427 genes processed
Elapsed time: 504.9 seconds (8.4 minutes)
Estimated remaining time: 2.4 minutes

Batch 184: Processing genes 18301-18400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.02it/s]


Progress: 18400/23427 genes processed
Elapsed time: 507.6 seconds (8.5 minutes)
Estimated remaining time: 2.3 minutes

Batch 185: Processing genes 18401-18500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.65it/s]


Progress: 18500/23427 genes processed
Elapsed time: 510.3 seconds (8.5 minutes)
Estimated remaining time: 2.3 minutes

Batch 186: Processing genes 18501-18600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.93it/s]


Progress: 18600/23427 genes processed
Elapsed time: 513.0 seconds (8.5 minutes)
Estimated remaining time: 2.2 minutes

Batch 187: Processing genes 18601-18700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.54it/s]


Progress: 18700/23427 genes processed
Elapsed time: 515.6 seconds (8.6 minutes)
Estimated remaining time: 2.2 minutes

Batch 188: Processing genes 18701-18800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.82it/s]


Progress: 18800/23427 genes processed
Elapsed time: 518.3 seconds (8.6 minutes)
Estimated remaining time: 2.1 minutes

Batch 189: Processing genes 18801-18900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 39.08it/s]


Progress: 18900/23427 genes processed
Elapsed time: 521.0 seconds (8.7 minutes)
Estimated remaining time: 2.1 minutes

Batch 190: Processing genes 18901-19000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 37.80it/s]


Progress: 19000/23427 genes processed
Elapsed time: 523.8 seconds (8.7 minutes)
Estimated remaining time: 2.0 minutes

Batch 191: Processing genes 19001-19100 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.64it/s]


Progress: 19100/23427 genes processed
Elapsed time: 526.5 seconds (8.8 minutes)
Estimated remaining time: 2.0 minutes

Batch 192: Processing genes 19101-19200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.47it/s]


Progress: 19200/23427 genes processed
Elapsed time: 529.3 seconds (8.8 minutes)
Estimated remaining time: 1.9 minutes

Batch 193: Processing genes 19201-19300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 38.62it/s]


Progress: 19300/23427 genes processed
Elapsed time: 532.0 seconds (8.9 minutes)
Estimated remaining time: 1.9 minutes

Batch 194: Processing genes 19301-19400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 40.39it/s]


Progress: 19400/23427 genes processed
Elapsed time: 534.6 seconds (8.9 minutes)
Estimated remaining time: 1.8 minutes

Batch 195: Processing genes 19401-19500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 34.33it/s]


Progress: 19500/23427 genes processed
Elapsed time: 537.7 seconds (9.0 minutes)
Estimated remaining time: 1.8 minutes

Batch 196: Processing genes 19501-19600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 34.72it/s]


Progress: 19600/23427 genes processed
Elapsed time: 540.7 seconds (9.0 minutes)
Estimated remaining time: 1.8 minutes

Batch 197: Processing genes 19601-19700 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 32.63it/s]


Progress: 19700/23427 genes processed
Elapsed time: 543.9 seconds (9.1 minutes)
Estimated remaining time: 1.7 minutes

Batch 198: Processing genes 19701-19800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 34.33it/s]


Progress: 19800/23427 genes processed
Elapsed time: 546.9 seconds (9.1 minutes)
Estimated remaining time: 1.7 minutes

Batch 199: Processing genes 19801-19900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 34.53it/s]


Progress: 19900/23427 genes processed
Elapsed time: 550.0 seconds (9.2 minutes)
Estimated remaining time: 1.6 minutes

Batch 200: Processing genes 19901-20000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.66it/s]


Progress: 20000/23427 genes processed
Elapsed time: 553.1 seconds (9.2 minutes)
Estimated remaining time: 1.6 minutes

Batch 201: Processing genes 20001-20100 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 33.28it/s]


Progress: 20100/23427 genes processed
Elapsed time: 556.2 seconds (9.3 minutes)
Estimated remaining time: 1.5 minutes

Batch 202: Processing genes 20101-20200 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 32.83it/s]


Progress: 20200/23427 genes processed
Elapsed time: 559.4 seconds (9.3 minutes)
Estimated remaining time: 1.5 minutes

Batch 203: Processing genes 20201-20300 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 32.83it/s]


Progress: 20300/23427 genes processed
Elapsed time: 562.6 seconds (9.4 minutes)
Estimated remaining time: 1.4 minutes

Batch 204: Processing genes 20301-20400 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 31.26it/s]


Progress: 20400/23427 genes processed
Elapsed time: 566.0 seconds (9.4 minutes)
Estimated remaining time: 1.4 minutes

Batch 205: Processing genes 20401-20500 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 29.98it/s]


Progress: 20500/23427 genes processed
Elapsed time: 569.4 seconds (9.5 minutes)
Estimated remaining time: 1.4 minutes

Batch 206: Processing genes 20501-20600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 35.28it/s]


Progress: 20600/23427 genes processed
Elapsed time: 572.4 seconds (9.5 minutes)
Estimated remaining time: 1.3 minutes

Batch 207: Processing genes 20601-20700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.55it/s]


Progress: 20700/23427 genes processed
Elapsed time: 575.5 seconds (9.6 minutes)
Estimated remaining time: 1.3 minutes

Batch 208: Processing genes 20701-20800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.94it/s]


Progress: 20800/23427 genes processed
Elapsed time: 578.6 seconds (9.6 minutes)
Estimated remaining time: 1.2 minutes

Batch 209: Processing genes 20801-20900 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 32.86it/s]


Progress: 20900/23427 genes processed
Elapsed time: 581.8 seconds (9.7 minutes)
Estimated remaining time: 1.2 minutes

Batch 210: Processing genes 20901-21000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 35.87it/s]


Progress: 21000/23427 genes processed
Elapsed time: 584.8 seconds (9.7 minutes)
Estimated remaining time: 1.1 minutes

Batch 211: Processing genes 21001-21100 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 32.61it/s]


Progress: 21100/23427 genes processed
Elapsed time: 588.0 seconds (9.8 minutes)
Estimated remaining time: 1.1 minutes

Batch 212: Processing genes 21101-21200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 34.83it/s]


Progress: 21200/23427 genes processed
Elapsed time: 591.0 seconds (9.8 minutes)
Estimated remaining time: 1.0 minutes

Batch 213: Processing genes 21201-21300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.90it/s]


Progress: 21300/23427 genes processed
Elapsed time: 594.1 seconds (9.9 minutes)
Estimated remaining time: 1.0 minutes

Batch 214: Processing genes 21301-21400 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.46it/s]


Progress: 21400/23427 genes processed
Elapsed time: 597.2 seconds (10.0 minutes)
Estimated remaining time: 0.9 minutes

Batch 215: Processing genes 21401-21500 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 31.95it/s]


Progress: 21500/23427 genes processed
Elapsed time: 600.5 seconds (10.0 minutes)
Estimated remaining time: 0.9 minutes

Batch 216: Processing genes 21501-21600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.51it/s]


Progress: 21600/23427 genes processed
Elapsed time: 603.6 seconds (10.1 minutes)
Estimated remaining time: 0.9 minutes

Batch 217: Processing genes 21601-21700 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 32.57it/s]


Progress: 21700/23427 genes processed
Elapsed time: 606.8 seconds (10.1 minutes)
Estimated remaining time: 0.8 minutes

Batch 218: Processing genes 21701-21800 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 33.23it/s]


Progress: 21800/23427 genes processed
Elapsed time: 610.0 seconds (10.2 minutes)
Estimated remaining time: 0.8 minutes

Batch 219: Processing genes 21801-21900 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 32.09it/s]


Progress: 21900/23427 genes processed
Elapsed time: 613.3 seconds (10.2 minutes)
Estimated remaining time: 0.7 minutes

Batch 220: Processing genes 21901-22000 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 31.49it/s]


Progress: 22000/23427 genes processed
Elapsed time: 616.6 seconds (10.3 minutes)
Estimated remaining time: 0.7 minutes

Batch 221: Processing genes 22001-22100 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 32.08it/s]


Progress: 22100/23427 genes processed
Elapsed time: 619.8 seconds (10.3 minutes)
Estimated remaining time: 0.6 minutes

Batch 222: Processing genes 22101-22200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 34.74it/s]


Progress: 22200/23427 genes processed
Elapsed time: 622.9 seconds (10.4 minutes)
Estimated remaining time: 0.6 minutes

Batch 223: Processing genes 22201-22300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 34.86it/s]


Progress: 22300/23427 genes processed
Elapsed time: 625.9 seconds (10.4 minutes)
Estimated remaining time: 0.5 minutes

Batch 224: Processing genes 22301-22400 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 33.05it/s]


Progress: 22400/23427 genes processed
Elapsed time: 629.0 seconds (10.5 minutes)
Estimated remaining time: 0.5 minutes

Batch 225: Processing genes 22401-22500 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 34.24it/s]


Progress: 22500/23427 genes processed
Elapsed time: 632.1 seconds (10.5 minutes)
Estimated remaining time: 0.4 minutes

Batch 226: Processing genes 22501-22600 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.76it/s]


Progress: 22600/23427 genes processed
Elapsed time: 635.2 seconds (10.6 minutes)
Estimated remaining time: 0.4 minutes

Batch 227: Processing genes 22601-22700 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.50it/s]


Progress: 22700/23427 genes processed
Elapsed time: 638.4 seconds (10.6 minutes)
Estimated remaining time: 0.3 minutes

Batch 228: Processing genes 22701-22800 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 35.43it/s]


Progress: 22800/23427 genes processed
Elapsed time: 641.3 seconds (10.7 minutes)
Estimated remaining time: 0.3 minutes

Batch 229: Processing genes 22801-22900 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 34.50it/s]


Progress: 22900/23427 genes processed
Elapsed time: 644.4 seconds (10.7 minutes)
Estimated remaining time: 0.2 minutes

Batch 230: Processing genes 22901-23000 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 33.90it/s]


Progress: 23000/23427 genes processed
Elapsed time: 647.5 seconds (10.8 minutes)
Estimated remaining time: 0.2 minutes

Batch 231: Processing genes 23001-23100 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 33.03it/s]


Progress: 23100/23427 genes processed
Elapsed time: 650.7 seconds (10.8 minutes)
Estimated remaining time: 0.2 minutes

Batch 232: Processing genes 23101-23200 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 35.72it/s]


Progress: 23200/23427 genes processed
Elapsed time: 653.6 seconds (10.9 minutes)
Estimated remaining time: 0.1 minutes

Batch 233: Processing genes 23201-23300 of 23427


Processing batch: 100%|██████████| 100/100 [00:02<00:00, 34.67it/s]


Progress: 23300/23427 genes processed
Elapsed time: 656.6 seconds (10.9 minutes)
Estimated remaining time: 0.1 minutes

Batch 234: Processing genes 23301-23400 of 23427


Processing batch: 100%|██████████| 100/100 [00:03<00:00, 32.13it/s]


Progress: 23400/23427 genes processed
Elapsed time: 659.9 seconds (11.0 minutes)
Estimated remaining time: 0.0 minutes

Batch 235: Processing genes 23401-23427 of 23427


Processing batch: 100%|██████████| 27/27 [00:01<00:00, 26.18it/s]


Progress: 23427/23427 genes processed
Elapsed time: 661.1 seconds (11.0 minutes)
Estimated remaining time: 0.0 minutes

Processing complete
Total genes processed: 23427/23427
Genes with errors: 0
Total time: 661.21 seconds (11.02 minutes)
Data saved to processed_gene_data


In [7]:
import json
# Load the gene manifest file that was created during processing
with open(os.path.join("processed_gene_data", "gene_manifest.json"), "r") as f:
    processed_genes = json.load(f)

# Loop through the genes
for gene in processed_genes:
    X, y, config, related_genes = processor.load_gene_data(gene, "processed_gene_data")
    print(
        f"Loaded data for {gene}: X shape {X.shape}, y shape {y.shape}, config {config}"
    )

Loaded data for BX293535.1: X shape (17041, 141), y shape (17041,), config {'width': [141, 2, 1], 'grid': 5, 'k': 4, 'seed': 42}
Loaded data for WNT4: X shape (17041, 79), y shape (17041,), config {'width': [79, 2, 1], 'grid': 5, 'k': 4, 'seed': 42}
Loaded data for ARL14: X shape (17041, 151), y shape (17041,), config {'width': [151, 2, 1], 'grid': 5, 'k': 4, 'seed': 42}
Loaded data for IGLV1-47: X shape (17041, 105), y shape (17041,), config {'width': [105, 2, 1], 'grid': 5, 'k': 4, 'seed': 42}
Loaded data for GPRC5D: X shape (17041, 77), y shape (17041,), config {'width': [77, 2, 1], 'grid': 5, 'k': 4, 'seed': 42}
Loaded data for SLC6A20: X shape (17041, 108), y shape (17041,), config {'width': [108, 2, 1], 'grid': 5, 'k': 4, 'seed': 42}
Loaded data for HIST1H3C: X shape (17041, 118), y shape (17041,), config {'width': [118, 2, 1], 'grid': 5, 'k': 4, 'seed': 42}
Loaded data for FGFBP2: X shape (17041, 90), y shape (17041,), config {'width': [90, 2, 1], 'grid': 5, 'k': 4, 'seed': 42}


In [29]:
import os
import json
import time
import numpy as np
import torch
import traceback
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [30]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


def print_gpu_memory():
    if torch.cuda.is_available():
        total_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
        allocated = torch.cuda.memory_allocated() / 1e9
        reserved = torch.cuda.memory_reserved() / 1e9
        print(
            f"GPU Memory: Total={total_memory:.2f}GB, Allocated={allocated:.2f}GB, Reserved={reserved:.2f}GB"
        )

Using device: cuda


In [31]:
class GeneDataLoader:
    def __init__(self, data_dir):
        self.data_dir = data_dir

    def load_gene_data(self, gene):
        gene_path = os.path.join(self.data_dir, "gene_data", gene)

        X = np.load(os.path.join(gene_path, "X.npy"))
        y = np.load(os.path.join(gene_path, "y.npy"))

        with open(os.path.join(gene_path, "metadata.json"), "r") as f:
            metadata = json.load(f)

        return X, y, metadata["config"], metadata["related_genes"]

In [32]:
class GeneDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [33]:
from kan import *

In [34]:
def r2_score(y_true, y_pred):
    total_sum_squares = torch.sum((y_true - torch.mean(y_true)) ** 2)
    residual_sum_squares = torch.sum((y_true - y_pred) ** 2)
    return 1 - (residual_sum_squares / (total_sum_squares + 1e-8))


def rmse(y_true, y_pred):
    return torch.sqrt(torch.mean((y_true - y_pred) ** 2))


def mae(y_true, y_pred):
    return torch.mean(torch.abs(y_true - y_pred))

In [39]:
def train_kan_models_parallel(
    gene_list,
    data_dir,
    output_dir,
    batch_size=32,
    max_models=4,
    epochs=50,
    patience=10,
    lr=0.001,
):
    os.makedirs(output_dir, exist_ok=True)
    results = {}
    gene_loader = GeneDataLoader(data_dir)

    batch_pbar = tqdm(total=len(gene_list), desc="Overall Progress")

    for batch_start in range(0, len(gene_list), max_models):
        batch_end = min(batch_start + max_models, len(gene_list))
        batch_genes = gene_list[batch_start:batch_end]

        (
            models,
            optimizers,
            criterions,
            loaders,
            gene_dirs,
            best_val_losses,
            patience_counters,
        ) = ([], [], [], [], [], [], [])
        histories = []

        for gene in batch_genes:
            try:
                print(f"\nPreparing model for gene: {gene}")

                gene_dir = os.path.join(output_dir, gene)
                os.makedirs(gene_dir, exist_ok=True)

                X, y, config, _ = gene_loader.load_gene_data(gene)
                X_train, X_val, y_train, y_val = train_test_split(
                    X, y, test_size=0.2, random_state=42
                )

                train_loader = DataLoader(
                    GeneDataset(X_train, y_train), batch_size=batch_size, shuffle=True
                )
                val_loader = DataLoader(
                    GeneDataset(X_val, y_val), batch_size=batch_size
                )

                model = KAN(width=config["width"],grid=config["grid"],k=config["k"],seed=config["seed"],device=device).to(
                    device
                )
                optimizer = torch.optim.Adam(model.parameters(), lr=lr)
                criterion = nn.MSELoss()

                models.append(model)
                optimizers.append(optimizer)
                criterions.append(criterion)
                loaders.append((train_loader, val_loader))
                gene_dirs.append(gene_dir)
                best_val_losses.append(float("inf"))
                patience_counters.append(0)
                histories.append([])

            except Exception as e:
                print(f"Error loading gene {gene}: {str(e)}")

        for epoch in range(epochs):
            for i, (
                model,
                optimizer,
                criterion,
                (train_loader, val_loader),
            ) in enumerate(zip(models, optimizers, criterions, loaders)):
                gene = batch_genes[i]

                model.train()
                train_loss = 0.0
                for X_batch, y_batch in train_loader:
                    X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                    optimizer.zero_grad()
                    output = model(X_batch)
                    loss = criterion(output.squeeze(), y_batch)
                    loss.backward()
                    optimizer.step()
                    train_loss += loss.item()

                model.eval()
                val_loss = 0.0
                with torch.no_grad():
                    for X_batch, y_batch in val_loader:
                        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                        predictions = model(X_batch).squeeze()
                        val_loss += criterion(predictions, y_batch).item()

                if val_loss < best_val_losses[i]:
                    best_val_losses[i] = val_loss
                    patience_counters[i] = 0
                    torch.save(
                        model.state_dict(), os.path.join(gene_dirs[i], "best_model.pt")
                    )
                else:
                    patience_counters[i] += 1

                if patience_counters[i] >= patience:
                    break

            batch_pbar.update(1)

    batch_pbar.close()
    print("Training completed!")

In [40]:
data_dir = "processed_gene_data"
output_dir = "kan_models"

with open(os.path.join(data_dir, "gene_manifest.json"), "r") as f:
    genes_list = list(json.load(f).keys())

print(f"Found {len(genes_list)} genes to process")

train_kan_models_parallel(
    gene_list=genes_list,
    data_dir=data_dir,
    output_dir=output_dir,
    batch_size=32,
    max_models=4,
    epochs=50,
    patience=10,
    lr=0.001,
)

Found 23427 genes to process


Overall Progress:   0%|          | 0/23427 [00:00<?, ?it/s]


Preparing model for gene: BX293535.1
checkpoint directory created: ./model
saving model version 0.0

Preparing model for gene: WNT4
checkpoint directory created: ./model
saving model version 0.0

Preparing model for gene: ARL14
checkpoint directory created: ./model
saving model version 0.0

Preparing model for gene: IGLV1-47
checkpoint directory created: ./model
saving model version 0.0


KeyboardInterrupt: 