From 10369153ae5e5b142a22e74f4f1514a1ac9d23b9 Mon Sep 17 00:00:00 2001 From: Alan Davidson Date: Fri, 24 Jan 2025 16:17:57 -0500 Subject: [PATCH 1/2] update types with ndarray, not done yet --- find_duplicates.py | 17 ++++++++++++----- gui.py | 9 +++++---- image_pyramid.py | 9 +++++---- tokenizer.py | 21 ++++++++++++++++++++- 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/find_duplicates.py b/find_duplicates.py index d7bdaea..35db94d 100644 --- a/find_duplicates.py +++ b/find_duplicates.py @@ -1,4 +1,5 @@ import numpy +import numpy.typing from typing import Iterable, Optional, Self @@ -71,7 +72,7 @@ def __str__(self) -> str: # Used solely for debugging def _initialize_segments( - matrix: numpy.ndarray, is_single_file: bool + matrix: numpy.typing.NDArray[numpy.uint8], is_single_file: bool ) -> tuple[list[_SegmentUnionFind], dict[_Coordinates, _SegmentUnionFind]]: """ Each _SegmentUnionFind we return has size at least 2: these have already @@ -111,7 +112,7 @@ def _initialize_segments( def _get_pixel_to_segment( - matrix: numpy.ndarray, is_single_file: bool + matrix: numpy.typing.NDArray[numpy.uint8], is_single_file: bool ) -> dict[_Coordinates, _SegmentUnionFind]: """ If is_single_file is set, we do not include pixels on the main diagonal, @@ -158,7 +159,9 @@ def key( return pixel_to_segment -def get_lengths(matrix: numpy.ndarray, is_single_file: bool) -> numpy.ndarray: +def get_lengths( + matrix: numpy.typing.NDArray[numpy.uint8], is_single_file: bool +) -> numpy.typing.NDArray[numpy.uint32]: """ We return an image whose pixels indicate how long a chain of nonzero values from the original matrix is. If is_single_file is set, the main diagonal @@ -175,7 +178,7 @@ def get_lengths(matrix: numpy.ndarray, is_single_file: bool) -> numpy.ndarray: def get_segments( - matrix: numpy.ndarray, is_single_file: bool + matrix: numpy.typing.NDArray[numpy.uint8], is_single_file: bool ) -> set[_SegmentUnionFind]: """ We return set of _SegmentUnionFinds describing all the segments we found in @@ -247,7 +250,11 @@ def update_candidate(candidate: _SegmentUnionFind) -> None: return best_candidate -def get_hues(matrix: numpy.ndarray, is_single_file: bool) -> numpy.ndarray: +def get_hues( + matrix: numpy.typing.NDArray[numpy.uint8], is_single_file: bool +) -> numpy.typing.NDArray[numpy.uint8]: + # Scores are going to start out as uint32's, but get turned into floats. + scores: numpy.typing.NDArray scores = get_lengths(matrix, is_single_file) # Cut everything off at the max, then divide by the max to put all values # between 0 and 1. diff --git a/gui.py b/gui.py index 1f9eea7..f1e1d81 100644 --- a/gui.py +++ b/gui.py @@ -1,5 +1,6 @@ from math import ceil import numpy +import numpy.typing import tkinter as tk import tkinter.font as tkfont from typing import Optional @@ -130,8 +131,8 @@ def display(self, pixel: int) -> None: class _Gui(tk.Frame): def __init__( self, - matrix: numpy.ndarray, - hues: Optional[numpy.ndarray], + matrix: numpy.typing.NDArray[numpy.uint8], + hues: Optional[numpy.typing.NDArray[numpy.uint8]], data_a: FileInfo, data_b: FileInfo, map_width: int, @@ -154,8 +155,8 @@ def _on_motion(self, event: tk.Event) -> None: def launch( - matrix: numpy.ndarray, - hues: Optional[numpy.ndarray], + matrix: numpy.typing.NDArray[numpy.uint8], + hues: Optional[numpy.typing.NDArray[numpy.uint8]], data_a: FileInfo, data_b: FileInfo, map_width: int, diff --git a/image_pyramid.py b/image_pyramid.py index 4e1c164..6aebd07 100644 --- a/image_pyramid.py +++ b/image_pyramid.py @@ -1,4 +1,5 @@ import numpy +import numpy.typing from typing import Optional import utils @@ -9,8 +10,8 @@ class ImagePyramid: def __init__( self, - matrix: numpy.ndarray, - hues: Optional[numpy.ndarray], + matrix: numpy.typing.NDArray[numpy.uint8], + hues: Optional[numpy.typing.NDArray[numpy.uint8]], sidelength: int, ) -> None: """ @@ -20,7 +21,7 @@ def __init__( self._pyramid.append(matrix) self._sidelength = sidelength - self._hue_pyramid: Optional[list[numpy.ndarray]] + self._hue_pyramid: Optional[list[numpy.typing.NDArray[numpy.uint8]]] if hues is None: self._hue_pyramid = None else: @@ -85,7 +86,7 @@ def __init__( def get_submatrix( self, top_left_x: int, top_left_y: int - ) -> tuple[numpy.ndarray, int, int]: + ) -> tuple[numpy.typing.NDArray[numpy.uint8], int, int]: """ We return a sidelength-by-sidelength-by-3 ndarray containing an HSV image of the relevant region, and the indices of the top-left corner. diff --git a/tokenizer.py b/tokenizer.py index 529c220..750c175 100755 --- a/tokenizer.py +++ b/tokenizer.py @@ -1,5 +1,6 @@ import code_tokenize import numpy +import numpy.typing from typing import NamedTuple, Optional from code_tokenize.tokens import ASTToken @@ -9,11 +10,29 @@ # Syntactic sugar: a Boundary contains the start and end of a token, where # each position is described by its line number and the column within the line. +# The first line of the file is line 1, but the first column of the line is +# column 0. The end is the first location *after* the end of the token (which +# might be 1 character past the end of the current line, if this is the last +# token on the line). +# Example: This file: +# print("hi") +# print("bye") +# Likely has these boundaries: +# ((1, 0), (1, 5)) print +# ((1, 5), (1, 6)) ( +# ((1, 6), (1, 10)) "hi" +# ((1, 10), (1, 11)) ) +# ((2, 0), (2, 5)) print +# ((2, 5), (2, 6)) ( +# ((2, 6), (2, 11)) "bye" +# ((2, 11), (2, 12)) ) + + Boundary = tuple[tuple[int, int], tuple[int, int]] class FileInfo(NamedTuple): - tokens: numpy.ndarray # Really a list[code_tokenize.tokens.Token] + tokens: numpy.typing.NDArray[str] lines: list[str] boundaries: list[Boundary] filename: str From e13400a075398051a8e51d696e96fb00bd1ca53d Mon Sep 17 00:00:00 2001 From: Alan Davidson Date: Fri, 24 Jan 2025 17:24:23 -0500 Subject: [PATCH 2/2] finish updating type annotations --- tokenizer.py | 4 +--- utils.py | 15 ++++++++------- zoom_map.py | 9 ++++++--- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tokenizer.py b/tokenizer.py index 750c175..b887b7a 100755 --- a/tokenizer.py +++ b/tokenizer.py @@ -26,13 +26,11 @@ # ((2, 5), (2, 6)) ( # ((2, 6), (2, 11)) "bye" # ((2, 11), (2, 12)) ) - - Boundary = tuple[tuple[int, int], tuple[int, int]] class FileInfo(NamedTuple): - tokens: numpy.typing.NDArray[str] + tokens: numpy.typing.NDArray[numpy.str_] lines: list[str] boundaries: list[Boundary] filename: str diff --git a/utils.py b/utils.py index 05a83dd..461cece 100644 --- a/utils.py +++ b/utils.py @@ -1,12 +1,15 @@ import numpy +import numpy.typing from typing import Optional PIXELS_IN_BIG_FILE = 50 * 1000 * 1000 # 50 megapixels -def to_hsv_matrix(matrix: numpy.ndarray, - hues: Optional[numpy.ndarray]) -> numpy.ndarray: +def to_hsv_matrix( + matrix: numpy.typing.NDArray[numpy.uint8], + hues: Optional[numpy.typing.NDArray[numpy.uint8]], +) -> numpy.typing.NDArray[numpy.uint8]: """ The matrix is a 2D array of uint8's. The hues are either None or another 2D array of the same shape. @@ -22,12 +25,10 @@ def to_hsv_matrix(matrix: numpy.ndarray, return result -# The two arguments to make_matrix both have type -# list[code_tokenize.tokens.ASTToken], but that module does not have type -# annotations and adding them in would be annoying. def make_matrix( - tokens_a: numpy.ndarray, tokens_b: numpy.ndarray -) -> numpy.ndarray: + tokens_a: numpy.typing.NDArray[numpy.str_], + tokens_b: numpy.typing.NDArray[numpy.str_] +) -> numpy.typing.NDArray[numpy.uint8]: matrix = numpy.zeros([len(tokens_a), len(tokens_b)], dtype=numpy.uint8) for i, value in enumerate(tokens_a): matrix[i, :] = (tokens_b == value) diff --git a/zoom_map.py b/zoom_map.py index e1de3c3..1e9cad8 100644 --- a/zoom_map.py +++ b/zoom_map.py @@ -1,5 +1,6 @@ from functools import partial import numpy +import numpy.typing import PIL.Image import PIL.ImageTk import tkinter as tk @@ -11,8 +12,8 @@ class ZoomMap(tk.Canvas): def __init__( self, tk_parent: tk.Widget, - matrix: numpy.ndarray, - hues: Optional[numpy.ndarray], + matrix: numpy.typing.NDArray[numpy.uint8], + hues: Optional[numpy.typing.NDArray[numpy.uint8]], sidelength: int, ) -> None: super().__init__(tk_parent, height=sidelength, width=sidelength, @@ -110,7 +111,9 @@ def _on_unclick(self, event: tk.Event) -> None: self._set_image() @staticmethod - def _to_image(matrix: numpy.ndarray) -> PIL.ImageTk.PhotoImage: + def _to_image( + matrix: numpy.typing.NDArray[numpy.uint8] + ) -> PIL.ImageTk.PhotoImage: image = PIL.Image.fromarray(matrix, mode="HSV") return PIL.ImageTk.PhotoImage(image)