diff --git a/python/genvarloader/_dataset/_genotypes.py b/python/genvarloader/_dataset/_genotypes.py index 4338c50..a9fe147 100644 --- a/python/genvarloader/_dataset/_genotypes.py +++ b/python/genvarloader/_dataset/_genotypes.py @@ -8,7 +8,7 @@ @nb.njit(parallel=True, nogil=True, cache=True) def get_diffs_sparse( - geno_offset_idxs: NDArray[np.integer], + geno_offset_idx: NDArray[np.integer], geno_v_idxs: NDArray[np.integer], geno_offsets: NDArray[np.integer], ilens: NDArray[np.integer], @@ -24,7 +24,7 @@ def get_diffs_sparse( Parameters ---------- - geno_offset_idxs : NDArray[np.intp] + geno_offset_idx : NDArray[np.intp] Shape = (n_regions, ploidy) Indices for each region into offsets. geno_v_idxs : NDArray[np.int32] Shape = (variants*samples*ploidy) Sparse genotypes i.e. variant indices for ALT genotypes. @@ -43,11 +43,11 @@ def get_diffs_sparse( v_starts : Optional[NDArray[np.int32]] Shape = (total_variants) Positions of unique variants. """ - n_queries, ploidy = geno_offset_idxs.shape + n_queries, ploidy = geno_offset_idx.shape diffs = np.empty((n_queries, ploidy), np.int32) for query in nb.prange(n_queries): for hap in nb.prange(ploidy): - o_idx = geno_offset_idxs[query, hap] + o_idx = geno_offset_idx[query, hap] if geno_offsets.ndim == 1: o_s, o_e = geno_offsets[o_idx], geno_offsets[o_idx + 1] else: @@ -118,7 +118,7 @@ def reconstruct_haplotypes_from_sparse( out_offsets: NDArray[np.integer], regions: NDArray[np.integer], shifts: NDArray[np.integer], - geno_offset_idxs: NDArray[np.integer], + geno_offset_idx: NDArray[np.integer], geno_offsets: NDArray[np.integer], geno_v_idxs: NDArray[np.integer], v_starts: NDArray[np.integer], @@ -135,6 +135,9 @@ def reconstruct_haplotypes_from_sparse( ): """Reconstruct haplotypes from reference sequence and variants. + Batched parallel driver: dispatches to :func:`reconstruct_haplotype_from_sparse` + (singular) for each ``(query, hap)`` pair. + Parameters ---------- out : NDArray[np.uint8] @@ -145,7 +148,7 @@ def reconstruct_haplotypes_from_sparse( Shape = (batch, 3) Regions to reconstruct haplotypes. shifts : NDArray[np.uint32] Shape = (batch, ploidy) Shifts for each region. - geno_offset_idxs: NDArray[np.intp] + geno_offset_idx: NDArray[np.intp] Shape = (batch, ploidy) Indices for each region into offsets. geno_offsets : NDArray[np.uint32] Shape = (batch*ploidy + 1) Offsets into genos. @@ -174,7 +177,7 @@ def reconstruct_haplotypes_from_sparse( annot_ref_pos : NDArray[np.int32] | None Ragged buffer for shape (batch, ploidy, ~length). Reference positions for annotations. """ - batch_size, ploidy = geno_offset_idxs.shape + batch_size, ploidy = geno_offset_idx.shape for query in nb.prange(batch_size): q = regions[query] c_idx: int = q[0] @@ -185,7 +188,7 @@ def reconstruct_haplotypes_from_sparse( for hap in nb.prange(ploidy): # index for full sparse genos - o_idx = geno_offset_idxs[query, hap] + o_idx = geno_offset_idx[query, hap] if geno_offsets.ndim == 1: o_s, o_e = geno_offsets[o_idx], geno_offsets[o_idx + 1] else: @@ -244,7 +247,10 @@ def reconstruct_haplotype_from_sparse( annot_v_idxs: NDArray[np.integer] | None = None, annot_ref_pos: NDArray[np.integer] | None = None, ): - """Reconstruct a haplotype from reference sequence and variants. + """Reconstruct a single haplotype from reference sequence and variants. + + Single-haplotype inner kernel. Use :func:`reconstruct_haplotypes_from_sparse` + (plural) to reconstruct a batch in parallel. Parameters ---------- @@ -419,7 +425,7 @@ def reconstruct_haplotype_from_sparse( def choose_exonic_variants( starts: NDArray[np.integer], ends: NDArray[np.integer], - geno_offset_idxs: NDArray[np.integer], + geno_offset_idx: NDArray[np.integer], geno_v_idxs: NDArray[np.integer], geno_offsets: NDArray[np.integer], v_starts: NDArray[np.integer], @@ -433,7 +439,7 @@ def choose_exonic_variants( Shape = (n_regions) Start positions for each region. ends : NDArray[np.int32] Shape = (n_regions) Ends for each region. - geno_offset_idxs : NDArray[np.intp] + geno_offset_idx : NDArray[np.intp] Shape = (n_regions, ploidy) Indices for each region into offsets. offsets : NDArray[np.int64] Shape = (total_variants + 1) Offsets into sparse genotypes. @@ -446,12 +452,12 @@ def choose_exonic_variants( deterministic : bool Whether to deterministically assign variants to groups """ - n_regions, ploidy = geno_offset_idxs.shape + n_regions, ploidy = geno_offset_idx.shape lengths = np.empty((n_regions, ploidy), np.int64) for query in nb.prange(n_regions): for hap in range(ploidy): - o_idx = geno_offset_idxs[query, hap] + o_idx = geno_offset_idx[query, hap] if geno_offsets.ndim == 1: o_s, o_e = geno_offsets[o_idx], geno_offsets[o_idx + 1] else: @@ -468,7 +474,7 @@ def choose_exonic_variants( ref_start: int = starts[query] ref_end: int = ends[query] for hap in nb.prange(ploidy): - o_idx = geno_offset_idxs[query, hap] + o_idx = geno_offset_idx[query, hap] # Mirror filter_af's (2, n_slices) indexing (sibling kernel below). if geno_offsets.ndim == 1: o_s, o_e = geno_offsets[o_idx], geno_offsets[o_idx + 1] @@ -521,7 +527,7 @@ def _choose_exonic_variants( @nb.njit(parallel=True, nogil=True, cache=True) def filter_af( - geno_offset_idxs: NDArray[np.integer], + geno_offset_idx: NDArray[np.integer], geno_offsets: NDArray[np.integer], geno_v_idxs: NDArray[np.integer], afs: NDArray[np.number], @@ -530,7 +536,7 @@ def filter_af( ) -> tuple[NDArray[np.bool_], NDArray[OFFSET_TYPE]]: """Filter variants based on allele frequency, marking them to keep or not.""" - batch_size, ploidy = geno_offset_idxs.shape + batch_size, ploidy = geno_offset_idx.shape if geno_offsets.ndim == 1: keep_offsets = geno_offsets.astype(OFFSET_TYPE) @@ -549,7 +555,7 @@ def filter_af( for query in nb.prange(batch_size): for hap in range(ploidy): # index for full sparse genos - o_idx = geno_offset_idxs[query, hap] + o_idx = geno_offset_idx[query, hap] if geno_offsets.ndim == 1: o_s, o_e = geno_offsets[o_idx], geno_offsets[o_idx + 1] else: diff --git a/python/genvarloader/_dataset/_haps.py b/python/genvarloader/_dataset/_haps.py index 27672a0..4ab1e55 100644 --- a/python/genvarloader/_dataset/_haps.py +++ b/python/genvarloader/_dataset/_haps.py @@ -302,13 +302,13 @@ def _haplotype_ilens( ) -> NDArray[np.int32]: """`idx` must be 1D.""" # (b p) - geno_offset_idxs = self._get_geno_offset_idx(idx, self.genotypes) + geno_offset_idx = self._get_geno_offset_idx(idx, self.genotypes) if self.filter == "exonic": keep, keep_offsets = choose_exonic_variants( starts=regions[:, 1], ends=regions[:, 2], - geno_offset_idxs=geno_offset_idxs, + geno_offset_idx=geno_offset_idx, geno_v_idxs=self.genotypes.data, geno_offsets=self.genotypes.offsets, v_starts=self.variants.start, @@ -319,7 +319,7 @@ def _haplotype_ilens( # (r s p) hap_ilens = get_diffs_sparse( - geno_offset_idxs=geno_offset_idxs, + geno_offset_idx=geno_offset_idx, geno_v_idxs=self.genotypes.data, geno_offsets=self.genotypes.offsets, ilens=self.variants.ilen, @@ -353,7 +353,7 @@ def haplotype_lengths_for_plan( keep, keep_offsets = choose_exonic_variants( starts=regions[:, 1], ends=regions[:, 2], - geno_offset_idxs=geno_offset_idx, + geno_offset_idx=geno_offset_idx, geno_v_idxs=self.genotypes.data, geno_offsets=self.genotypes.offsets, v_starts=self.variants.start, @@ -453,7 +453,7 @@ def get_haps_and_shifts( assert_never(self.kind) return ( - out, # type: ignore | pylance doesn't like this but it's correct behavior for the signature + out, req.geno_offset_idx, req.shifts, req.diffs, @@ -488,7 +488,7 @@ def _prepare_request( keep, keep_offsets = choose_exonic_variants( starts=regions[:, 1], ends=regions[:, 2], - geno_offset_idxs=geno_offset_idx, + geno_offset_idx=geno_offset_idx, geno_v_idxs=self.genotypes.data, geno_offsets=self.genotypes.offsets, v_starts=self.variants.start, @@ -545,10 +545,13 @@ def _get_geno_offset_idx( idx: NDArray[np.integer], genotypes: Ragged[V_IDX_TYPE], ) -> NDArray[np.intp]: - r_idx, s_idx = np.unravel_index(idx, genotypes.shape[:2]) # type: ignore + r_idx, s_idx = np.unravel_index(idx, genotypes.shape[:2]) # type: ignore[no-matching-overload] # Ragged.shape is tuple[int | None, ...]; numpy overload expects all-int ploid_idx = np.arange(genotypes.shape[-2], dtype=np.intp) - rsp_idx = (r_idx[:, None], s_idx[:, None], ploid_idx) - geno_offset_idx = np.ravel_multi_index(rsp_idx, genotypes.shape[:-1]) # type: ignore + # (region, sample, ploid) index tuple for ravel_multi_index. + region_sample_ploid_idx = (r_idx[:, None], s_idx[:, None], ploid_idx) + geno_offset_idx = np.ravel_multi_index( + region_sample_ploid_idx, genotypes.shape[:-1] + ) # type: ignore[no-matching-overload] # Ragged.shape is tuple[int | None, ...]; numpy overload expects all-int return geno_offset_idx def _get_variants( @@ -560,7 +563,7 @@ def _get_variants( keep_offsets: NDArray[np.integer] | None = None, ) -> RaggedVariants: # TODO: maybe filter variants for region, shifts? - r, s = np.unravel_index(idx, self.genotypes.shape[:2]) # type: ignore + r, s = np.unravel_index(idx, self.genotypes.shape[:2]) # type: ignore[no-matching-overload] # Ragged.shape is tuple[int | None, ...]; numpy overload expects all-int # (b p ~v) genos = cast(Ragged[V_IDX_TYPE], self.genotypes[r, s]) @@ -598,7 +601,7 @@ def _get_variants( # guaranteed to have same shape as genotypes but need to make it contiguous/copy the data dosages = self.dosages[r, s] if _keep is not None: - dosages = ak.to_regular(dosages[_keep], 1) # type: ignore + dosages = ak.to_regular(dosages[_keep], 1) fields["dosage"] = Ragged(ak.to_packed(dosages)) fields.update( @@ -650,7 +653,7 @@ def _reconstruct_haplotypes(self, req: ReconstructionRequest) -> Ragged[np.bytes req.out_offsets, ) reconstruct_haplotypes_from_sparse( - geno_offset_idxs=req.geno_offset_idx, + geno_offset_idx=req.geno_offset_idx, out=haps.data, out_offsets=haps.offsets, regions=req.regions, @@ -681,7 +684,7 @@ def _reconstruct_haplotypes(self, req: ReconstructionRequest) -> Ragged[np.bytes out_buf = np.empty(total, np.uint8) reconstruct_haplotypes_from_sparse( - geno_offset_idxs=flat_geno_idx.reshape(-1, 1), + geno_offset_idx=flat_geno_idx.reshape(-1, 1), out=out_buf, out_offsets=splice_plan.permuted_out_offsets, regions=permuted_regions, @@ -741,7 +744,7 @@ def _reconstruct_annotated_haplotypes( # annot offsets match haps offsets, so we share them. reconstruct_haplotypes_from_sparse( - geno_offset_idxs=req.geno_offset_idx, + geno_offset_idx=req.geno_offset_idx, out=haps.data, out_offsets=haps.offsets, regions=req.regions, @@ -778,7 +781,7 @@ def _reconstruct_annotated_haplotypes( annot_pos_buf = np.empty(total, np.int32) reconstruct_haplotypes_from_sparse( - geno_offset_idxs=flat_geno_idx.reshape(-1, 1), + geno_offset_idx=flat_geno_idx.reshape(-1, 1), out=out_buf, out_offsets=splice_plan.permuted_out_offsets, regions=permuted_regions, @@ -824,7 +827,7 @@ def _permute_request_for_splice( NDArray[np.bool_] | None, NDArray[np.integer] | None, ]: - """Permute the per-element arrays in ``req`` according to ``splice_plan.perm``. + """Permute the per-element arrays in ``req`` according to ``splice_plan.permutation``. ``geno_offset_idx`` and ``shifts`` have shape ``(B, P)``; flatten to ``(B*P,)`` in (query, ploidy) C-order, then permute. The kernel then @@ -833,27 +836,27 @@ def _permute_request_for_splice( assert req.splice_plan is not None splice_plan = req.splice_plan ploidy = req.shifts.shape[1] if req.shifts.ndim > 1 else 1 - perm = splice_plan.perm + permutation = splice_plan.permutation - flat_geno_idx = req.geno_offset_idx.reshape(-1)[perm].astype( + flat_geno_idx = req.geno_offset_idx.reshape(-1)[permutation].astype( np.intp, copy=False ) - flat_shifts = req.shifts.reshape(-1)[perm].astype(np.int32, copy=False) + flat_shifts = req.shifts.reshape(-1)[permutation].astype(np.int32, copy=False) # regions has shape (B, 3). For (B*P, 3), each query repeats P times - # consecutively, then we apply the same perm. + # consecutively, then we apply the same permutation. regions_flat = np.repeat(req.regions, ploidy, axis=0) - permuted_regions = regions_flat[perm] + permuted_regions = regions_flat[permutation] # keep / keep_offsets: per-k granularity (length B*P + 1). if req.keep is not None and req.keep_offsets is not None: keep_lens = np.diff(req.keep_offsets) - keep_lens_perm = keep_lens[perm] + keep_lens_perm = keep_lens[permutation] keep_offsets_perm = lengths_to_offsets( keep_lens_perm.astype(np.int64), dtype=np.int64 ) keep_perm = np.empty(int(keep_lens_perm.sum()), dtype=np.bool_) write_cursor = 0 - for k_old in perm: + for k_old in permutation: s = int(req.keep_offsets[k_old]) e = int(req.keep_offsets[k_old + 1]) width = e - s diff --git a/python/genvarloader/_dataset/_impl.py b/python/genvarloader/_dataset/_impl.py index da67de0..c1e804f 100644 --- a/python/genvarloader/_dataset/_impl.py +++ b/python/genvarloader/_dataset/_impl.py @@ -573,11 +573,11 @@ def with_tracks( new_tracks = self._tracks.with_tracks(None) elif isinstance(tracks, str): new_tracks = self._tracks.with_tracks([tracks]).to_kind( - _kind, # type: ignore + _kind, # type: ignore[bad-argument-type] # _kind is broader union; runtime branch ensures correct subtype ) else: new_tracks = self._tracks.with_tracks(tracks).to_kind( - _kind, # type: ignore + _kind, # type: ignore[bad-argument-type] # _kind is broader union; runtime branch ensures correct subtype ) # Validate: at least one of (seqs, tracks) must remain active. @@ -998,7 +998,7 @@ def haplotype_lengths( if out_reshape is not None: hap_lens = hap_lens.reshape( *out_reshape, - self._seqs.genotypes.shape[-2], # type: ignore + self._seqs.genotypes.shape[-2], ) return hap_lens @@ -1148,7 +1148,7 @@ def write_transformed_track( overwrite=overwrite, ) - return replace(self, _tracks=new_tracks) # type: ignore + return replace(self, _tracks=new_tracks) # type: ignore[bad-return] # dataclasses.replace returns Self but pyrefly widens to base Dataset union def write_annot_tracks( self, tracks: dict[str, str | Path | pl.DataFrame], overwrite: bool = False @@ -1554,7 +1554,7 @@ def __getitem__( def __getitem__( self, idx: StrIdx | tuple[StrIdx] | tuple[StrIdx, StrIdx] ) -> SEQ | TRK | tuple[SEQ, TRK]: - return super().__getitem__(idx) # type: ignore + return super().__getitem__(idx) # type: ignore[bad-return] # base Dataset returns broad union; SEQ/TRK typevars narrow at use sites class RaggedDataset(Dataset, Generic[MaybeRSEQ, MaybeRTRK]): @@ -1709,4 +1709,4 @@ def __getitem__( def __getitem__( self, idx: StrIdx | tuple[StrIdx] | tuple[StrIdx, StrIdx] ) -> RSEQ | RTRK | tuple[RSEQ, RTRK]: - return super().__getitem__(idx) # type: ignore + return super().__getitem__(idx) # type: ignore[bad-return] # base Dataset returns broad union; RSEQ/RTRK typevars narrow at use sites diff --git a/python/genvarloader/_dataset/_indexing.py b/python/genvarloader/_dataset/_indexing.py index ed209d1..3d719e9 100644 --- a/python/genvarloader/_dataset/_indexing.py +++ b/python/genvarloader/_dataset/_indexing.py @@ -74,7 +74,7 @@ def from_region_and_sample_idxs( if regions is not None: _regions = np.array(regions) r2i_map = HashTable( - max=len(_regions) * 2, # type: ignore | 2x size for perf > mem + max=len(_regions) * 2, # type: ignore[bad-argument-type] # hirola HashTable.max typed as numpy.Number but accepts int (2x size for perf > mem) dtype=_regions.dtype, ) r2i_map.add(_regions) @@ -83,7 +83,7 @@ def from_region_and_sample_idxs( _samples = np.array(samples) s2i_map = HashTable( - max=len(_samples) * 2, # type: ignore | 2x size for perf > mem + max=len(_samples) * 2, # type: ignore[bad-argument-type] # hirola HashTable.max typed as numpy.Number but accepts int (2x size for perf > mem) dtype=_samples.dtype, ) s2i_map.add(_samples) @@ -337,7 +337,7 @@ def _init( raise ValueError( "Found indices in the splice map that are out of bounds for the dataset." ) - rows = HashTable(max=len(_names) * 2, dtype=_names.dtype) # type: ignore + rows = HashTable(max=len(_names) * 2, dtype=_names.dtype) # type: ignore[bad-argument-type] # hirola HashTable.max typed as numpy.Number but accepts int rows.add(_names) sm = SpliceMap( names=rows, @@ -377,7 +377,7 @@ def subset_to( new_map = self.map.subset_to(rows) if rows is not None else self.map sub_dsi = self.dsi.subset_to(samples=samples) - region_idxs = ak.flatten(new_map.splice_map, None).to_numpy() # type: ignore + region_idxs = ak.flatten(new_map.splice_map, None).to_numpy() eff_dsi = self.dsi.subset_to(regions=region_idxs, samples=samples) return replace(self, map=new_map, dsi=sub_dsi), eff_dsi diff --git a/python/genvarloader/_dataset/_query.py b/python/genvarloader/_dataset/_query.py index 2350ac0..358303a 100644 --- a/python/genvarloader/_dataset/_query.py +++ b/python/genvarloader/_dataset/_query.py @@ -103,11 +103,11 @@ def getitem( ) if out_reshape is not None: - recon = tuple(o.reshape(out_reshape + o.shape[1:]) for o in recon) # type: ignore + recon = tuple(o.reshape(out_reshape + o.shape[1:]) for o in recon) # type: ignore[bad-argument-type, no-matching-overload] # heterogeneous reshape() across array kinds; shape tuple may contain None for ragged dims if squeeze: # (1 [p] l) -> ([p] l) - recon = tuple(o.squeeze(0) for o in recon) # type: ignore + recon = tuple(o.squeeze(0) for o in recon) # type: ignore[bad-argument-count] # RaggedVariants.squeeze() takes no args; other kinds do — heterogeneous dispatch if len(recon) == 1: recon = recon[0] @@ -152,7 +152,7 @@ def _getitem_unspliced( to_rc: NDArray[np.bool_] = view.full_regions[r_idx, 3] == -1 recon = tuple(reverse_complement_ragged(r, to_rc) for r in recon) - return recon, squeeze, out_reshape # type: ignore + return recon, squeeze, out_reshape def _getitem_spliced( @@ -222,13 +222,13 @@ def _getitem_spliced( # Permute the per-region to_rc mask the same way the plan permuted # the kernel queries. The plan acts on a flattened (B, *inner_fixed) # k-index, so first replicate to_rc across the inner axes, then - # gather via plan.perm. + # gather via plan.permutation. B = regions.shape[0] - n_k = int(plan.perm.shape[0]) + n_k = int(plan.permutation.shape[0]) inner_factor, rem = divmod(n_k, B) if rem != 0: raise AssertionError( - "plan.perm length is not a multiple of len(regions); " + "plan.permutation length is not a multiple of len(regions); " "inner-fixed flatten factor inconsistent." ) to_rc_unperm = regions[:, 3] == -1 @@ -238,7 +238,7 @@ def _getitem_spliced( # (B, E) C-order: same value across the inner axis for a given # query. np.repeat gives (B*E,) in (query, inner) C-order. to_rc_flat = np.repeat(to_rc_unperm, inner_factor) - to_rc_per_elem: NDArray[np.bool_] = to_rc_flat[plan.perm] + to_rc_per_elem: NDArray[np.bool_] = to_rc_flat[plan.permutation] recon = tuple(reverse_complement_ragged(r, to_rc_per_elem) for r in recon) # Rewrap each per-element Ragged with the plan's group_offsets to expose @@ -334,9 +334,9 @@ def reverse_complement_ragged( """Reverse-complement (or reverse) ragged outputs according to a per-row mask.""" if isinstance(rag, Ragged): if is_rag_dtype(rag, np.bytes_): - rag = Ragged(ak.to_packed(ak.where(to_rc, reverse_complement(rag), rag))) # type: ignore + rag = Ragged(ak.to_packed(ak.where(to_rc, reverse_complement(rag), rag))) else: - rag = Ragged(ak.to_packed(ak.where(to_rc, rag[..., ::-1], rag))) # type: ignore + rag = Ragged(ak.to_packed(ak.where(to_rc, rag[..., ::-1], rag))) elif isinstance(rag, RaggedAnnotatedHaps): rag.haps = reverse_complement_ragged(rag.haps, to_rc) rag.var_idxs = reverse_complement_ragged(rag.var_idxs, to_rc) diff --git a/python/genvarloader/_dataset/_rag_variants.py b/python/genvarloader/_dataset/_rag_variants.py index c291e69..96b1c42 100644 --- a/python/genvarloader/_dataset/_rag_variants.py +++ b/python/genvarloader/_dataset/_rag_variants.py @@ -94,7 +94,7 @@ def find_and_convert_to_ragged(content: Content, depth_context: dict, **kwargs): ): return ak.with_parameter(content, "__list__", "Ragged", highlevel=False) - arr = ak.transform( # type: ignore + arr = ak.transform( # type: ignore[bad-assignment] # ak.transform stub returns Array|tuple|None; we know it's Array here find_and_convert_to_ragged, arr, depth_context={"n_varlen": 0} ) @@ -114,7 +114,7 @@ def start(self) -> Ragged[POS_TYPE]: def ilen(self) -> Ragged[np.int32]: """Indel lengths. Infallible.""" if "ilen" not in self.fields: - ilen = ak.str.length(self.alt) - ak.str.length(self.ref) # type: ignore + ilen = ak.str.length(self.alt) - ak.str.length(self.ref) # type: ignore[missing-attribute] # ak.str submodule isn't exposed in awkward's top-level type stubs ilen = Ragged(ilen) return ilen @@ -161,7 +161,7 @@ def reshape(self, shape: tuple[int | None, ...]) -> Self: def squeeze(self, **kwargs) -> Self: """Squeeze first axis.""" - return self[0] # type: ignore + return self[0] def infer_germline_ccfs_( self, ccf_field: str = "dosages", max_ccf: float = 1.0 @@ -210,14 +210,14 @@ def rc_(self, to_rc: NDArray[np.bool_] | None = None) -> Self: The RaggedVariants object with the alleles reverse complemented. """ if to_rc is None: - to_rc = np.ones(self.shape[0], np.bool_) # type: ignore + to_rc = np.ones(self.shape[0], np.bool_) # type: ignore[no-matching-overload] # ak.Array shape may contain None; np.ones overload expects int|Sequence[int] elif not to_rc.any(): return self self["alt"] = ak.to_packed( ak.where( to_rc, - reverse_complement(self["alt"]), # type: ignore + reverse_complement(self["alt"]), self["alt"], ) ) @@ -226,7 +226,7 @@ def rc_(self, to_rc: NDArray[np.bool_] | None = None) -> Self: self["ref"] = ak.to_packed( ak.where( to_rc, - reverse_complement(self["ref"]), # type: ignore + reverse_complement(self["ref"]), self["ref"], ) ) @@ -352,7 +352,7 @@ def _alleles_to_nested_tensor( if isinstance(_alleles, (ListArray, ListOffsetArray)): offsets = _alleles _alleles = cast(Content, _alleles.content) - _alleles = cast(NDArray[np.bytes_], _alleles.data) # type: ignore + _alleles = cast(NDArray[np.bytes_], _alleles.data) if tokenizer == "seqpro": _alleles = sp.tokenize(_alleles, dict(zip(sp.DNA.alphabet, range(4))), 4) @@ -363,13 +363,13 @@ def _alleles_to_nested_tensor( _alleles = torch.from_numpy(_alleles) - offsets = cast(ListArray | ListOffsetArray, offsets) # type: ignore + offsets = cast(ListArray | ListOffsetArray, offsets) # type: ignore[redundant-cast] # cast is documentation here; pyrefly narrows but readers benefit # (N ~V ~L) -> (N ~V) -> (N*~V) if isinstance(offsets, ListArray): - lengths = cast(NDArray, offsets.stops.data - offsets.starts.data) # type: ignore + lengths = cast(NDArray, offsets.stops.data - offsets.starts.data) offsets = lengths_to_offsets(lengths, np.int32) else: - offsets = offsets.offsets.data.astype(np.int32) # type: ignore + offsets = offsets.offsets.data.astype(np.int32) # type: ignore[missing-attribute] # awkward Index.data typed as ArrayLike; numpy ndarray method missing on stub lengths = np.diff(offsets) if len(lengths) == 0: @@ -492,13 +492,13 @@ def _rc_helper( ragv: RaggedVariants, field: str, to_rc: NDArray[np.bool_] | None = None ): # flatten all but last two dimensions & strip params for numba - alleles = ragv[field].layout # type: ignore - while not isinstance(alleles.content, NumpyArray): # type: ignore - alleles = alleles.content # type: ignore + alleles = ragv[field].layout + while not isinstance(alleles.content, NumpyArray): + alleles = alleles.content alleles = ak.without_parameters(alleles) if to_rc is None: - to_rc = np.ones(ragv.shape[:-1], np.bool_) # type: ignore + to_rc = np.ones(ragv.shape[:-1], np.bool_) # type: ignore[no-matching-overload] # ak.Array shape may contain None; np.ones overload expects int|Sequence[int] # broadcast to same shape as variants, and flatten # (batch) -> (batch * ploidy * n_variants) @@ -507,7 +507,7 @@ def _rc_helper( _to_rc = _to_rc.layout while not isinstance(_to_rc, NumpyArray): _to_rc = _to_rc.content - _to_rc = cast(NDArray[np.bool_], _to_rc.data) # type: ignore + _to_rc = cast(NDArray[np.bool_], _to_rc.data) _rc_numba_helper(alleles, _to_rc) diff --git a/python/genvarloader/_dataset/_reconstruct.py b/python/genvarloader/_dataset/_reconstruct.py index 51c784e..4fbd31b 100644 --- a/python/genvarloader/_dataset/_reconstruct.py +++ b/python/genvarloader/_dataset/_reconstruct.py @@ -207,7 +207,7 @@ def __call__( out_offsets=out_ofsts_per_t, # (b*p+1) regions=regions, # (b, 3) shifts=shifts, # (b p) - geno_offset_idxs=geno_idx, # (b p) + geno_offset_idx=geno_idx, # (b p) geno_v_idxs=self.haps.genotypes.data, # (r*s*p*v) geno_offsets=self.haps.genotypes.offsets, # (r*s*p+1) v_starts=self.haps.variants.start, # (tot_v) diff --git a/python/genvarloader/_dataset/_reference.py b/python/genvarloader/_dataset/_reference.py index 3a60e97..3ebfd1a 100644 --- a/python/genvarloader/_dataset/_reference.py +++ b/python/genvarloader/_dataset/_reference.py @@ -232,7 +232,7 @@ def __post_init__(self): if self.region_names is not None: region_names = self.full_bed[self.region_names].to_numpy().astype(np.str_) self._region_map = HashTable( - max=len(region_names) * 2, # type: ignore + max=len(region_names) * 2, # type: ignore[bad-argument-type] # hirola HashTable.max typed as numpy.Number but accepts int dtype=region_names.dtype, ) self._region_map.add(region_names) @@ -404,9 +404,9 @@ def subset_to(self, regions: StrIdx): or is_dtype(regions, np.integer) or (isinstance(regions, Sequence) and isinstance(regions[0], int)) ): - self._subset_bed = self.full_bed[regions] # type: ignore + self._subset_bed = self.full_bed[regions] # type: ignore[bad-index] # polars DataFrame.__getitem__ doesn't accept all our union members but runtime branch ensures valid kinds else: - self._subset_bed = self.full_bed.filter(regions) # type: ignore + self._subset_bed = self.full_bed.filter(regions) # type: ignore[bad-argument-type] # polars filter accepts predicates / bool arrays; our union has equivalent shapes self._subset_regions = bed_to_regions(self._subset_bed, self.reference.c_map) return self @@ -456,7 +456,7 @@ def _getitem_spliced(self, idx: Idx) -> T: if self.rc_neg: to_rc_unperm = regions[:, 3] == -1 if to_rc_unperm.any(): - to_rc_perm = to_rc_unperm[plan.perm] + to_rc_perm = to_rc_unperm[plan.permutation] per_elem = Ragged( ak.to_packed( ak.where( @@ -475,19 +475,19 @@ def _getitem_spliced(self, idx: Idx) -> T: ) if out_reshape is not None: - ref = ref.reshape(out_reshape) # type: ignore + ref = ref.reshape(out_reshape) if self.output_length == "ragged": out = ref elif self.output_length == "variable": - out = to_padded(ref, pad_value=bytes([self.reference.pad_char])) # type: ignore + out = to_padded(ref, pad_value=bytes([self.reference.pad_char])) else: raise AssertionError( "splice + fixed-length output should be blocked earlier" ) if squeeze: - out = out.squeeze(0) # type: ignore + out = out.squeeze(0) return cast(T, out) @@ -546,17 +546,17 @@ def _getitem_unspliced(self, idx: Idx) -> T: ref = ak.to_packed(ak.where(to_rc, reverse_complement(ref), ref)) if out_reshape is not None: - ref = ref.reshape(out_reshape) # type: ignore + ref = ref.reshape(out_reshape) if self.output_length == "ragged": out = ref elif self.output_length == "variable": - out = to_padded(ref, pad_value=bytes([self.reference.pad_char])) # type: ignore + out = to_padded(ref, pad_value=bytes([self.reference.pad_char])) else: - out = ref.to_numpy() # type: ignore + out = ref.to_numpy() if squeeze: - out = out.squeeze(0) # type: ignore + out = out.squeeze(0) return cast(T, out) @@ -701,7 +701,7 @@ def _fetch_spliced_ref( This is the kernel-dispatch core shared by :class:`Ref.__call__`'s splice branch and :meth:`RefDataset._getitem_spliced`. """ - permuted_regions = regions[plan.perm] + permuted_regions = regions[plan.permutation] raw = get_reference( regions=permuted_regions, out_offsets=plan.permuted_out_offsets, @@ -717,8 +717,8 @@ def _fetch_spliced_ref( if TORCH_AVAILABLE: - import torch # type: ignore - import torch.utils.data as td # type: ignore + import torch + import torch.utils.data as td class TorchDataset(td.Dataset): dataset: RefDataset[NDArray[np.bytes_]] @@ -753,4 +753,4 @@ def __getitem__(self, idx: list[int]): return batch else: - TorchDataset = no_torch_error # type: ignore + TorchDataset = no_torch_error diff --git a/python/genvarloader/_dataset/_splice.py b/python/genvarloader/_dataset/_splice.py index 82c9945..b33b63a 100644 --- a/python/genvarloader/_dataset/_splice.py +++ b/python/genvarloader/_dataset/_splice.py @@ -21,7 +21,7 @@ class SplicePlan: """Permutation + offsets that re-target the kernel write into spliced layout. The kernel is called with ``ploidy=1`` and one query per element of the - flattened ``(B, *inner_fixed)`` length array. ``perm`` reorders those + flattened ``(B, *inner_fixed)`` length array. ``permutation`` reorders those flattened k-indices so the global write order becomes ``(splice_row, sample, *inner_fixed, splice_element)`` C-order. After the kernel writes, the data buffer can be exposed as a Ragged with either @@ -29,7 +29,7 @@ class SplicePlan: ``(splice_row, sample, inner)`` cell). """ - perm: NDArray[np.intp] + permutation: NDArray[np.intp] permuted_lengths: NDArray[np.int32] permuted_out_offsets: NDArray[np.int64] group_offsets: NDArray[np.int64] @@ -97,10 +97,10 @@ def build_splice_plan( pair_lengths = np.diff(splice_row_offsets) # length n_pairs if E == 1: # Identity permutation; flat_lengths shape is (B,) already permuted. - perm = np.arange(B, dtype=np.intp) + permutation = np.arange(B, dtype=np.intp) permuted_lengths_flat = flat_lengths.reshape(-1).astype(np.int32, copy=False) else: - # Build perm by iterating (pair, e, element). + # Build permutation by iterating (pair, e, element). # For a pair p with element range [s, s+L): # for e in 0..E: # k-indices = [(s+0)*E + e, (s+1)*E + e, ..., (s+L-1)*E + e] @@ -117,8 +117,12 @@ def build_splice_plan( # (E, L): each row e is q_range*E + e. ke = q_range[None, :] * E + np.arange(E, dtype=np.intp)[:, None] perm_parts.append(ke.reshape(-1)) - perm = np.concatenate(perm_parts) if perm_parts else np.empty(0, dtype=np.intp) - permuted_lengths_flat = flat_2d.reshape(-1)[perm].astype(np.int32, copy=False) + permutation = ( + np.concatenate(perm_parts) if perm_parts else np.empty(0, dtype=np.intp) + ) + permuted_lengths_flat = flat_2d.reshape(-1)[permutation].astype( + np.int32, copy=False + ) permuted_out_offsets = lengths_to_offsets(permuted_lengths_flat, dtype=np.int64) @@ -147,7 +151,7 @@ def build_splice_plan( out_shape = (n_rows, n_samples, None) return SplicePlan( - perm=perm, + permutation=permutation, permuted_lengths=permuted_lengths_flat, permuted_out_offsets=permuted_out_offsets, group_offsets=group_offsets, @@ -206,7 +210,7 @@ def from_bed( ).to_ak() splice_map = cast(ak.Array, splice_map) - rows = HashTable(max=len(names) * 2, dtype=names.dtype) # type: ignore + rows = HashTable(max=len(names) * 2, dtype=names.dtype) # type: ignore[bad-argument-type] # hirola HashTable.max typed as numpy.Number but accepts int rows.add(names) return ( diff --git a/python/genvarloader/_dataset/_tracks.py b/python/genvarloader/_dataset/_tracks.py index 588cb7a..3f07722 100644 --- a/python/genvarloader/_dataset/_tracks.py +++ b/python/genvarloader/_dataset/_tracks.py @@ -144,7 +144,7 @@ def shift_and_realign_tracks_sparse( out_offsets: NDArray[np.integer], regions: NDArray[np.integer], shifts: NDArray[np.integer], - geno_offset_idxs: NDArray[np.integer], + geno_offset_idx: NDArray[np.integer], geno_v_idxs: NDArray[np.integer], geno_offsets: NDArray[np.integer], v_starts: NDArray[np.integer], @@ -169,7 +169,7 @@ def shift_and_realign_tracks_sparse( Shape = (batch, 3) Regions, each is (contig_idx, start, end). shifts : NDArray[np.int32] Shape = (batch, ploidy) Shifts for each haplotype. - geno_offset_idxs : NDArray[np.intp] + geno_offset_idx : NDArray[np.intp] Shape = (batch, ploidy) Indices into offsets for each region. geno_v_idxs : NDArray[np.int32] Shape = (variants) Indices of variants. @@ -188,7 +188,7 @@ def shift_and_realign_tracks_sparse( keep_offsets : Optional[NDArray[np.int64]] Shape = (batch*ploidy + 1) Offsets into keep. """ - n_regions, ploidy = geno_offset_idxs.shape + n_regions, ploidy = geno_offset_idx.shape for query in nb.prange(n_regions): t_s, t_e = track_offsets[query], track_offsets[query + 1] q_track = tracks[t_s:t_e] @@ -196,7 +196,7 @@ def shift_and_realign_tracks_sparse( q_start = regions[query, 1] for hap in nb.prange(ploidy): - o_idx = geno_offset_idxs[query, hap] + o_idx = geno_offset_idx[query, hap] k_idx = query * ploidy + hap if keep is not None and keep_offsets is not None: @@ -627,7 +627,7 @@ def _call_float32( assert not isinstance(output_length, int), ( "splice plan path requires variable/ragged output" ) - # The plan was built with inner_fixed = (n_tracks,) so plan.perm has + # The plan was built with inner_fixed = (n_tracks,) so plan.permutation has # length B*T indexed in (query, track) C-order: k = query * T + track. # Each k_new in the permuted order targets one (query, track) pair; we # need to write its bytes into out_buf at plan.permuted_out_offsets[k_new]. @@ -635,7 +635,7 @@ def _call_float32( total = int(splice_plan.permuted_out_offsets[-1]) out_buf = np.empty(total, np.float32) - k_old = splice_plan.perm # length B*T + k_old = splice_plan.permutation # length B*T track_of_k = k_old % n_tracks query_of_k = k_old // n_tracks @@ -705,7 +705,7 @@ def _call_intervals(self, idx: NDArray[np.integer]) -> RaggedIntervals: starts = ak.concatenate(out_starts, axis=1) ends = ak.concatenate(out_ends, axis=1) values = ak.concatenate(out_values, axis=1) - return RaggedIntervals(starts, ends, values) # type: ignore + return RaggedIntervals(starts, ends, values) def write_transformed_track( self, diff --git a/python/genvarloader/_dataset/_write.py b/python/genvarloader/_dataset/_write.py index ba475fd..56b26bd 100644 --- a/python/genvarloader/_dataset/_write.py +++ b/python/genvarloader/_dataset/_write.py @@ -513,7 +513,7 @@ def _write_phased_chunked( var_idxs = ak.flatten( ak.concatenate(ls_sparse, -1), - None, # type: ignore + None, ).to_numpy() # (s p) lengths = np.stack([a.lengths for a in ls_sparse], 0).sum(0) @@ -804,10 +804,10 @@ def _write_track( out = np.memmap( out_dir / "offsets.npy", - dtype=offsets.dtype, # type: ignore + dtype=offsets.dtype, mode="r+", shape=1, offset=offset_offset, ) - out[-1] = offsets[-1] # type: ignore + out[-1] = offsets[-1] out.flush() diff --git a/python/genvarloader/_dummy.py b/python/genvarloader/_dummy.py index 851c856..497b8c5 100644 --- a/python/genvarloader/_dummy.py +++ b/python/genvarloader/_dummy.py @@ -87,7 +87,7 @@ def get_dummy_dataset(spliced: bool = False): start=repeat(dummy_regions[:, 1].astype(POS_TYPE), "r -> (r s)", s=n_samples), ilen=repeat(np.array([-2, -1, 0, 1], np.int32), "s -> (r s)", r=n_regions), ref=None, - alt=RaggedAlleles.from_offsets( # type: ignore + alt=RaggedAlleles.from_offsets( # type: ignore[bad-argument-type] # RaggedAlleles is a Phantom subclass; from_offsets returns base Ragged[bytes_] data=repeat(sp.cast_seqs("ACGTT"), "a -> (r a)", r=n_regions), shape=(n_regions * n_samples, None), offsets=lengths_to_offsets( diff --git a/python/genvarloader/_ragged.py b/python/genvarloader/_ragged.py index a22f256..b147b94 100644 --- a/python/genvarloader/_ragged.py +++ b/python/genvarloader/_ragged.py @@ -33,7 +33,7 @@ class RaggedIntervals: values: Ragged[np.float32] def __getitem__(self, idx) -> RaggedIntervals: - out = RaggedIntervals(self.starts[idx], self.ends[idx], self.values[idx]) # type: ignore + out = RaggedIntervals(self.starts[idx], self.ends[idx], self.values[idx]) # type: ignore[bad-argument-type] # Ragged.__getitem__ widens to Array per awkward stubs return out @property @@ -75,9 +75,9 @@ def squeeze(self, axis: int | tuple[int, ...] | None = None) -> RaggedIntervals: Axis or axes to squeeze. If None, all axes of length 1 are squeezed. """ return RaggedIntervals( - self.starts.squeeze(axis), # type: ignore - self.ends.squeeze(axis), # type: ignore - self.values.squeeze(axis), # type: ignore + self.starts.squeeze(axis), # type: ignore[bad-argument-type] # seqpro Ragged.squeeze stub returns broader union than Ragged[T] + self.ends.squeeze(axis), # type: ignore[bad-argument-type] # see above + self.values.squeeze(axis), # type: ignore[bad-argument-type] # see above ) def to_fixed_shape( @@ -286,7 +286,7 @@ def ufunc_comp_dna(seq: NDArray[np.uint8]) -> NDArray[np.uint8]: def _ak_comp_dna_helper(layout, **kwargs): if layout.is_numpy: return NumpyArray( - ufunc_comp_dna(layout.data), # type: ignore + ufunc_comp_dna(layout.data), parameters=layout.parameters, ) diff --git a/python/genvarloader/_torch.py b/python/genvarloader/_torch.py index edd191c..b786ec0 100644 --- a/python/genvarloader/_torch.py +++ b/python/genvarloader/_torch.py @@ -18,7 +18,7 @@ TORCH_AVAILABLE = True except ImportError: - TORCH_AVAILABLE = False # type: ignore + TORCH_AVAILABLE = False if TYPE_CHECKING: @@ -28,7 +28,7 @@ from ._dataset._impl import Dataset -def no_torch_error(*args, **kwargs): # type: ignore +def no_torch_error(*args, **kwargs): raise ImportError( "PyTorch is not available. Please install PyTorch to use this function/class." ) @@ -68,7 +68,7 @@ def get_dataloader( if sampler is None: sampler = get_sampler( - len(dataset), # type: ignore + len(dataset), batch_size, shuffle, drop_last, @@ -216,7 +216,7 @@ class StratifiedSampler(td.Sampler[np.intp]): ds_idx: NDArray[np.intp] - def __init__( # type: ignore + def __init__( self, n_regions: int, n_samples: int, @@ -240,5 +240,5 @@ def __len__(self): def __iter__(self): return iter(self.ds_idx) else: - TorchDataset = no_torch_error # type: ignore - StratifiedSampler = no_torch_error # type: ignore + TorchDataset = no_torch_error + StratifiedSampler = no_torch_error diff --git a/python/genvarloader/_variants/_sitesonly.py b/python/genvarloader/_variants/_sitesonly.py index 9847dbc..0e54e79 100644 --- a/python/genvarloader/_variants/_sitesonly.py +++ b/python/genvarloader/_variants/_sitesonly.py @@ -148,7 +148,7 @@ def __init__( raise NotImplementedError("max_variants_per_region > 1 not yet supported") if not isinstance(dataset, ArrayDataset): - raise ValueError( # type: ignore + raise ValueError( 'Dataset output_length must either be "variable" or a fixed length integer.' ) @@ -273,7 +273,7 @@ def __getitem__( wt_haps, mut_haps, flags, - tracks, # type: ignore | guaranteed bound + tracks, # type: ignore[unbound-name] # tracks is bound when isinstance(out, tuple) branch is taken ) else: return wt_haps, mut_haps, flags diff --git a/python/genvarloader/data_registry.py b/python/genvarloader/data_registry.py index 4de227b..802b968 100644 --- a/python/genvarloader/data_registry.py +++ b/python/genvarloader/data_registry.py @@ -28,7 +28,7 @@ def fetch(name: Literal["geuvadis_ebi", "1kgp"]) -> dict[str, Path]: return _geuvadis_ebi() elif name == "1kgp": return _1kgp() - raise ValueError(f"Unknown dataset: {name}") # type: ignore + raise ValueError(f"Unknown dataset: {name}") def _geuvadis_ebi(): diff --git a/tests/dataset/genotypes/test_choose_exonic_variants.py b/tests/dataset/genotypes/test_choose_exonic_variants.py index 8d4d7ff..f5f4c4b 100644 --- a/tests/dataset/genotypes/test_choose_exonic_variants.py +++ b/tests/dataset/genotypes/test_choose_exonic_variants.py @@ -22,7 +22,7 @@ def _common_inputs() -> dict[str, np.ndarray]: return { "starts": np.asarray([0], dtype=np.int32), "ends": np.asarray([100], dtype=np.int32), - "geno_offset_idxs": np.asarray([[0, 1]], dtype=np.intp), + "geno_offset_idx": np.asarray([[0, 1]], dtype=np.intp), # Two variants, indices 0 and 1, both inside [0, 100): "geno_v_idxs": np.asarray([0, 1], dtype=np.int32), "v_starts": np.asarray([10, 50], dtype=np.int32), diff --git a/tests/dataset/test_splice_plan.py b/tests/dataset/test_splice_plan.py index efde304..10f1540 100644 --- a/tests/dataset/test_splice_plan.py +++ b/tests/dataset/test_splice_plan.py @@ -19,7 +19,7 @@ def test_plan_no_inner_axes(): n_rows=2, ) # No inner fixed, so perm is identity. - np.testing.assert_array_equal(plan.perm, [0, 1, 2]) + np.testing.assert_array_equal(plan.permutation, [0, 1, 2]) np.testing.assert_array_equal(plan.permuted_lengths, [3, 4, 5]) np.testing.assert_array_equal(plan.permuted_out_offsets, [0, 3, 7, 12]) # group_offsets at (row, sample) granularity: 2 entries + 1. @@ -65,7 +65,7 @@ def test_plan_ploidy_2(): n_samples=1, n_rows=2, ) - np.testing.assert_array_equal(plan.perm, [0, 2, 1, 3, 4, 5]) + np.testing.assert_array_equal(plan.permutation, [0, 2, 1, 3, 4, 5]) np.testing.assert_array_equal(plan.permuted_lengths, [10, 20, 11, 21, 30, 31]) np.testing.assert_array_equal( plan.permuted_out_offsets, [0, 10, 30, 41, 62, 92, 123] @@ -100,7 +100,7 @@ def test_plan_multi_sample_ploidy_2(): # s=0, p=1: e=0 → k(q=0,p=1)=1; e=1 → k(q=1,p=1)=3 # s=1, p=0: e=0 → k(q=2,p=0)=4; e=1 → k(q=3,p=0)=6 # s=1, p=1: e=0 → k(q=2,p=1)=5; e=1 → k(q=3,p=1)=7 - np.testing.assert_array_equal(plan.perm, [0, 2, 1, 3, 4, 6, 5, 7]) + np.testing.assert_array_equal(plan.permutation, [0, 2, 1, 3, 4, 6, 5, 7]) np.testing.assert_array_equal(plan.permuted_lengths, [1, 3, 2, 4, 5, 7, 6, 8]) # group_offsets at (1, 2, 2) granularity = 4 cells + 1. # cell sums: 1+3=4, 2+4=6, 5+7=12, 6+8=14 @@ -137,7 +137,7 @@ def test_plan_single_element_rows(): n_rows=2, ) # With singleton splice rows the permutation still groups by (r, s, p). - np.testing.assert_array_equal(plan.perm, [0, 1, 2, 3]) + np.testing.assert_array_equal(plan.permutation, [0, 1, 2, 3]) np.testing.assert_array_equal(plan.permuted_lengths, [5, 6, 7, 8]) @@ -157,7 +157,7 @@ def test_plan_inner_fixed_size_3(): # t=0: e=0 → k=0; e=1 → k=3 # t=1: e=0 → k=1; e=1 → k=4 # t=2: e=0 → k=2; e=1 → k=5 - np.testing.assert_array_equal(plan.perm, [0, 3, 1, 4, 2, 5]) + np.testing.assert_array_equal(plan.permutation, [0, 3, 1, 4, 2, 5]) np.testing.assert_array_equal(plan.permuted_lengths, [1, 4, 2, 5, 3, 6]) np.testing.assert_array_equal(plan.group_offsets, [0, 5, 12, 21]) assert plan.out_shape == (1, 1, 3, None) @@ -173,7 +173,7 @@ def test_plan_dtype_invariants(): n_samples=1, n_rows=2, ) - assert plan.perm.dtype == np.intp + assert plan.permutation.dtype == np.intp assert plan.permuted_lengths.dtype == np.int32 # offset arrays use seqpro's OFFSET_TYPE (int64). assert plan.permuted_out_offsets.dtype == np.int64