diff --git a/pylib/fields/base_field.py b/pylib/fields/base_field.py index 155d6de..f6e39bb 100644 --- a/pylib/fields/base_field.py +++ b/pylib/fields/base_field.py @@ -11,6 +11,7 @@ class BaseField: field_set: str = "" # All fields in this set get reconciled at the same time suffix: Union[int, float] = 0 # When columns have same name break the tie with this task_id: str = "" + freeze: bool = False def to_dict(self, reconciled=False, add_note=False) -> dict[str, Any]: raise NotImplementedError() diff --git a/pylib/fields/highlighter_field.py b/pylib/fields/highlighter_field.py index 4ec5aa3..753e946 100644 --- a/pylib/fields/highlighter_field.py +++ b/pylib/fields/highlighter_field.py @@ -45,8 +45,7 @@ def unreconciled_list( def to_dict(self, reconciled=False, add_note=False) -> dict[str, Any]: field_dict = { self.header("text"): self.text, - # self.header("start"): self.start, - # self.header("end"): self.end, + self.header("position"): f"({self.start}, {self.end})", } return self.decorate_dict(field_dict, add_note) @@ -201,6 +200,7 @@ def align_json_fields(group) -> dict[str, list["HighlightField"]]: # Update unreconciled suffixes to match the reconciled span for j, part in enumerate(parts): part.suffix = i if j == 0 else float(f"{i}.{j}") + part.freeze = True # Add a reconciled record, one for each set of parts high = HighlightField( @@ -212,6 +212,7 @@ def align_json_fields(group) -> dict[str, list["HighlightField"]]: label=parts[0].label, field_set=parts[0].field_set, suffix=i, + freeze=True, ) aligned[(start, end)].append(high) diff --git a/pylib/formats/common_format.py b/pylib/formats/common_format.py index 9330775..8e68248 100644 --- a/pylib/formats/common_format.py +++ b/pylib/formats/common_format.py @@ -60,10 +60,9 @@ def read_table(args, df): for raw_row in records: row = Row() - row.add_field( - args.group_by, - SameField(value=raw_row[args.group_by]), - ) + row.append(SameField( + name=args.group_by, value=raw_row[args.group_by], + )) for name, value in raw_row.items(): if name == args.group_by: @@ -77,7 +76,8 @@ def read_table(args, df): value = json.loads(value) else: value = {"x": 0, "y": 0, "width": 0, "height": 0} - row.add_field(name, BoxField( + row.append(BoxField( + name=name, left=round(value["x"]), right=round(value["x"] + value["width"]), top=round(value["y"]), @@ -88,7 +88,8 @@ def read_table(args, df): value = json.loads(value) else: value = {"x1": 0, "y1": 0, "x2": 0, "y2": 0} - row.add_field(name, LengthField( + row.append(LengthField( + name=name, x1=round(value["x1"]), y1=round(value["y1"]), x2=round(value["x2"]), @@ -96,22 +97,23 @@ def read_table(args, df): )) case "noop": value = value if value else "" - row.add_field(name, NoOpField(value=value)) + row.append(NoOpField(name=name, value=value)) case "point": value = json.loads(value) if value else {"x": 0, "y": 0} - row.add_field(name, PointField( + row.append(PointField( + name=name, x=round(value["x"]), y=round(value["y"]), )) case "same": value = value if value else "" - row.add_field(name, SameField(value=value)) + row.append(SameField(name=name, value=value)) case "select": value = value if value else "" - row.add_field(name, SelectField(value=value)) + row.append(SelectField(name=name, value=value)) case "text": value = value if value else "" - row.add_field(name, TextField(value=value)) + row.append(TextField(name=name, value=value)) table.rows.append(row) diff --git a/pylib/row.py b/pylib/row.py index e10089d..e231b3f 100644 --- a/pylib/row.py +++ b/pylib/row.py @@ -22,12 +22,18 @@ @dataclass class Row: - # Yes, a list and not a dict because we're renaming things etc. fields: list[AnyField] = field_default(default_factory=list) def __getitem__(self, key): return next(f for f in self.fields if f.field_name == key) + def __iter__(self): + yield from self.fields + + def __iadd__(self, other): + self.fields += other.fields if isinstance(other, Row) else other + return self + def append(self, field: AnyField): self.fields.append(field) @@ -42,7 +48,7 @@ def to_dict(self, add_note=False, reconciled=False) -> dict[str, Any]: for field in self.fields: - if isinstance(field, TaskField): + if isinstance(field, TaskField) and not field.freeze: suffixes[field.name_group] += 1 field.suffix = suffixes[field.name_group] diff --git a/pylib/summary.py b/pylib/summary.py index 2fdf648..ff524a5 100644 --- a/pylib/summary.py +++ b/pylib/summary.py @@ -25,10 +25,6 @@ def report(args, unreconciled: Table, reconciled: Table): reconciled_df = reconciled.to_df(args) flag_df = reconciled.to_flag_df(args) alias_group_by(args, unreconciled_df, reconciled_df, flag_df) - from pprint import pp - # pp(unreconciled_df.columns) - # pp(reconciled_df.columns) - # pp(flag_df.columns) has_users = 1 if args.user_column in unreconciled_df.columns else 0 transcribers_df = get_transcribers_df(args, unreconciled_df) @@ -279,7 +275,7 @@ def merge_dataframes(args, unreconciled_df, reconciled_df, flag_df): keys += [args.row_key] if args.row_key in unreconciled_df.columns else [] keys += [args.user_column] if args.user_column in unreconciled_df.columns else [] - df = pd.concat([reconciled_df, note_df, unreconciled_df]).fillna("") + df = pd.concat([unreconciled_df, note_df, reconciled_df]).fillna("") df = df.reset_index(drop=True) df = df[keys + [c for c in df.columns if c not in keys]] df = df.sort_values(keys) diff --git a/pylib/table.py b/pylib/table.py index a56a352..d786ebc 100644 --- a/pylib/table.py +++ b/pylib/table.py @@ -33,15 +33,15 @@ def to_csv(self, args: Namespace, path, add_note=False) -> None: df.to_csv(path, index=False) def to_df(self, args: Namespace, add_note=False) -> pd.DataFrame: - records = self.to_dict(add_note=add_note) + records = self.to_records(add_note=add_note) df = pd.DataFrame(records) headers = self.field_order(df, args) df = df[headers] return df - def to_dict(self, add_note=False) -> list[dict]: - as_dict = [r.to_dict(add_note, self.reconciled) for r in self.rows] - return as_dict + def to_records(self, add_note=False) -> list[dict]: + as_recs = [r.to_dict(add_note, self.reconciled) for r in self.rows] + return as_recs @staticmethod def field_order(df, args): @@ -64,7 +64,7 @@ def reconcile(self, args) -> "Table": table = Table(reconciled=True) for _, row_group in groups: - row = Row() + new_row = Row() row_group = list(row_group) row_count = len(row_group) @@ -75,8 +75,8 @@ def reconcile(self, args) -> "Table": if field_set and field_set not in used_field_sets: group = [] for row in row_group: - row_set = [f for f in row.fields if f.field_set == field_set] - group.append(row_set) + fields = [f for f in row.fields if f.field_set == field_set] + group.append(fields) used_field_sets.add(field_set) elif field_set in used_field_sets: @@ -86,17 +86,15 @@ def reconcile(self, args) -> "Table": group = [r[field_name] for r in row_group] if not group: - row.append(cls( + new_row.append(cls( note=f"All {row_count} records are blank", flag=Flag.ALL_BLANK )) continue fields = cls.reconcile(group, row_count, args) - fields = fields if isinstance(fields, list) else [fields] - for field in fields: - row.append(field) + new_row += fields if isinstance(fields, list) else [fields] - table.rows.append(row) + table.rows.append(new_row) return table