Skip to content

Commit

Permalink
WIP Simultaneously reconcile some field types #79
Browse files Browse the repository at this point in the history
  • Loading branch information
rafelafrance committed Jun 26, 2023
1 parent cd65a8d commit d957a7d
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 32 deletions.
1 change: 1 addition & 0 deletions pylib/fields/base_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class BaseField:
field_set: str = "" # All fields in this set get reconciled at the same time
suffix: Union[int, float] = 0 # When columns have same name break the tie with this
task_id: str = ""
freeze: bool = False

def to_dict(self, reconciled=False, add_note=False) -> dict[str, Any]:
raise NotImplementedError()
Expand Down
5 changes: 3 additions & 2 deletions pylib/fields/highlighter_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ def unreconciled_list(
def to_dict(self, reconciled=False, add_note=False) -> dict[str, Any]:
field_dict = {
self.header("text"): self.text,
# self.header("start"): self.start,
# self.header("end"): self.end,
self.header("position"): f"({self.start}, {self.end})",
}
return self.decorate_dict(field_dict, add_note)

Expand Down Expand Up @@ -201,6 +200,7 @@ def align_json_fields(group) -> dict[str, list["HighlightField"]]:
# Update unreconciled suffixes to match the reconciled span
for j, part in enumerate(parts):
part.suffix = i if j == 0 else float(f"{i}.{j}")
part.freeze = True

# Add a reconciled record, one for each set of parts
high = HighlightField(
Expand All @@ -212,6 +212,7 @@ def align_json_fields(group) -> dict[str, list["HighlightField"]]:
label=parts[0].label,
field_set=parts[0].field_set,
suffix=i,
freeze=True,
)
aligned[(start, end)].append(high)

Expand Down
24 changes: 13 additions & 11 deletions pylib/formats/common_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,9 @@ def read_table(args, df):
for raw_row in records:
row = Row()

row.add_field(
args.group_by,
SameField(value=raw_row[args.group_by]),
)
row.append(SameField(
name=args.group_by, value=raw_row[args.group_by],
))

for name, value in raw_row.items():
if name == args.group_by:
Expand All @@ -77,7 +76,8 @@ def read_table(args, df):
value = json.loads(value)
else:
value = {"x": 0, "y": 0, "width": 0, "height": 0}
row.add_field(name, BoxField(
row.append(BoxField(
name=name,
left=round(value["x"]),
right=round(value["x"] + value["width"]),
top=round(value["y"]),
Expand All @@ -88,30 +88,32 @@ def read_table(args, df):
value = json.loads(value)
else:
value = {"x1": 0, "y1": 0, "x2": 0, "y2": 0}
row.add_field(name, LengthField(
row.append(LengthField(
name=name,
x1=round(value["x1"]),
y1=round(value["y1"]),
x2=round(value["x2"]),
y2=round(value["y2"]),
))
case "noop":
value = value if value else ""
row.add_field(name, NoOpField(value=value))
row.append(NoOpField(name=name, value=value))
case "point":
value = json.loads(value) if value else {"x": 0, "y": 0}
row.add_field(name, PointField(
row.append(PointField(
name=name,
x=round(value["x"]),
y=round(value["y"]),
))
case "same":
value = value if value else ""
row.add_field(name, SameField(value=value))
row.append(SameField(name=name, value=value))
case "select":
value = value if value else ""
row.add_field(name, SelectField(value=value))
row.append(SelectField(name=name, value=value))
case "text":
value = value if value else ""
row.add_field(name, TextField(value=value))
row.append(TextField(name=name, value=value))

table.rows.append(row)

Expand Down
10 changes: 8 additions & 2 deletions pylib/row.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,18 @@

@dataclass
class Row:
# Yes, a list and not a dict because we're renaming things etc.
fields: list[AnyField] = field_default(default_factory=list)

def __getitem__(self, key):
return next(f for f in self.fields if f.field_name == key)

def __iter__(self):
yield from self.fields

def __iadd__(self, other):
self.fields += other.fields if isinstance(other, Row) else other
return self

def append(self, field: AnyField):
self.fields.append(field)

Expand All @@ -42,7 +48,7 @@ def to_dict(self, add_note=False, reconciled=False) -> dict[str, Any]:

for field in self.fields:

if isinstance(field, TaskField):
if isinstance(field, TaskField) and not field.freeze:
suffixes[field.name_group] += 1
field.suffix = suffixes[field.name_group]

Expand Down
6 changes: 1 addition & 5 deletions pylib/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ def report(args, unreconciled: Table, reconciled: Table):
reconciled_df = reconciled.to_df(args)
flag_df = reconciled.to_flag_df(args)
alias_group_by(args, unreconciled_df, reconciled_df, flag_df)
from pprint import pp
# pp(unreconciled_df.columns)
# pp(reconciled_df.columns)
# pp(flag_df.columns)

has_users = 1 if args.user_column in unreconciled_df.columns else 0
transcribers_df = get_transcribers_df(args, unreconciled_df)
Expand Down Expand Up @@ -279,7 +275,7 @@ def merge_dataframes(args, unreconciled_df, reconciled_df, flag_df):
keys += [args.row_key] if args.row_key in unreconciled_df.columns else []
keys += [args.user_column] if args.user_column in unreconciled_df.columns else []

df = pd.concat([reconciled_df, note_df, unreconciled_df]).fillna("")
df = pd.concat([unreconciled_df, note_df, reconciled_df]).fillna("")
df = df.reset_index(drop=True)
df = df[keys + [c for c in df.columns if c not in keys]]
df = df.sort_values(keys)
Expand Down
22 changes: 10 additions & 12 deletions pylib/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ def to_csv(self, args: Namespace, path, add_note=False) -> None:
df.to_csv(path, index=False)

def to_df(self, args: Namespace, add_note=False) -> pd.DataFrame:
records = self.to_dict(add_note=add_note)
records = self.to_records(add_note=add_note)
df = pd.DataFrame(records)
headers = self.field_order(df, args)
df = df[headers]
return df

def to_dict(self, add_note=False) -> list[dict]:
as_dict = [r.to_dict(add_note, self.reconciled) for r in self.rows]
return as_dict
def to_records(self, add_note=False) -> list[dict]:
as_recs = [r.to_dict(add_note, self.reconciled) for r in self.rows]
return as_recs

@staticmethod
def field_order(df, args):
Expand All @@ -64,7 +64,7 @@ def reconcile(self, args) -> "Table":
table = Table(reconciled=True)

for _, row_group in groups:
row = Row()
new_row = Row()
row_group = list(row_group)
row_count = len(row_group)

Expand All @@ -75,8 +75,8 @@ def reconcile(self, args) -> "Table":
if field_set and field_set not in used_field_sets:
group = []
for row in row_group:
row_set = [f for f in row.fields if f.field_set == field_set]
group.append(row_set)
fields = [f for f in row.fields if f.field_set == field_set]
group.append(fields)
used_field_sets.add(field_set)

elif field_set in used_field_sets:
Expand All @@ -86,17 +86,15 @@ def reconcile(self, args) -> "Table":
group = [r[field_name] for r in row_group]

if not group:
row.append(cls(
new_row.append(cls(
note=f"All {row_count} records are blank", flag=Flag.ALL_BLANK
))
continue

fields = cls.reconcile(group, row_count, args)
fields = fields if isinstance(fields, list) else [fields]
for field in fields:
row.append(field)
new_row += fields if isinstance(fields, list) else [fields]

table.rows.append(row)
table.rows.append(new_row)

return table

Expand Down

0 comments on commit d957a7d

Please sign in to comment.