Skip to content

Commit

Permalink
Add initial parsing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
sco1 committed Aug 7, 2023
1 parent 71100b5 commit 3184211
Show file tree
Hide file tree
Showing 3 changed files with 211 additions and 11 deletions.
40 changes: 29 additions & 11 deletions dropmate_py/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ class Health(str, Enum): # noqa: D101

@dataclass
class FauxSeries:
"""Helper container to support column access by name of a raw log line."""
"""
Helper container to support column access by name of a raw log line.
NOTE: `raw_columns` is assumed to already be split.
"""

raw_columns: abc.Sequence[str]
indices: ColumnIndices
Expand All @@ -84,7 +88,8 @@ def __getitem__(self, key: str) -> str:
raise KeyError(f"Column {key} not present in log file.")

val = self.raw_columns[idx]
if not isinstance(val, str):
if not isinstance(val, str): # pragma: no cover
# Shouldn't ever get here but add the guard just in case
raise ValueError("Provided log data contains non-string value(s).")

return val
Expand Down Expand Up @@ -112,7 +117,9 @@ class DropRecord:

def __eq__(self, other: t.Any) -> bool:
if not isinstance(other, DropRecord):
return NotImplemented
raise NotImplementedError(
f"Can only compare between {type(self).__name__}, received: {type(other).__name__}"
)

return (self.uid == other.uid) and (self.start_time_utc == other.start_time_utc)

Expand Down Expand Up @@ -149,10 +156,10 @@ class Dropmate: # noqa: D101
firmware_version: float
last_scanned_time_utc: dt.datetime

def __len__(self) -> int:
def __len__(self) -> int: # pragma: no cover
return len(self.drops)

def __str__(self) -> str:
def __str__(self) -> str: # pragma: no cover
scanned_pretty = self.last_scanned_time_utc.strftime(r"%Y-%m-%d %H:%M")
return f"UID: {self.uid}, FW: {self.firmware_version}, {len(self.drops)} drops, Scanned: {scanned_pretty} UTC" # noqa: E501

Expand All @@ -175,13 +182,24 @@ def _group_by_uid(drop_logs: list[DropRecord]) -> list[Dropmate]:
return dropmates


def parse_raw_log(log_filepath: Path) -> list[Dropmate]:
"""Parse the provided compiled Dropmate log CSV into a list of drops, grouped by device."""
full_log = log_filepath.read_text().splitlines()
indices = ColumnIndices.from_header(full_log[0])
def _parse_raw_log(log_lines: abc.Sequence[str]) -> list[DropRecord]:
"""
Parse the provided compiled Dropmate log lines into a list of drop records.
NOTE: The provided `log_lines` is assumed to include the header line.
"""
indices = ColumnIndices.from_header(log_lines[0])

drop_logs = []
for line in full_log[1:]:
for line in log_lines[1:]:
drop_logs.append(DropRecord.from_raw(line, indices))

return _group_by_uid(drop_logs)
return drop_logs


def log_parse_pipeline(log_filepath: Path) -> list[Dropmate]:
"""Parse the provided compiled Dropmate log CSV into a list of drops, grouped by device."""
log_lines = log_filepath.read_text().splitlines()
parsed_records = _parse_raw_log(log_lines)

return _group_by_uid(parsed_records)
28 changes: 28 additions & 0 deletions tests/test_faux_series.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pytest

from dropmate_py import parser

SAMPLE_FULL_HEADER = "serial_number,uid,battery,device_health,firmware_version,log_timestamp,log_altitude,total_flights,flights_over_18kft,recorded_flights,flight_index,start_time_utc,end_time_utc,start_barometric_altitude_msl_ft,end_barometric_altitude_msl_ft,dropmate_internal_time_utc,last_scanned_time_utc,scan_device_type,scan_device_os,dropmate_app_version"
SAMPLE_DATA_LINE = "0,E002270067A94C18,Good,good,5.1,true,true,3,0,3,1,2023-04-20T14:45:45Z,2023-04-20T14:47:37Z,1591,262,2023-04-20T19:16:38Z,2023-04-20T19:34:04.547Z,SM S901U1,31,1.5.16"

SAMPLE_FULL_HEADER_COL_IDX = parser.ColumnIndices.from_header(SAMPLE_FULL_HEADER)


def test_faux_series_getter() -> None:
ds = parser.FauxSeries(SAMPLE_DATA_LINE.split(","), SAMPLE_FULL_HEADER_COL_IDX)
assert ds["uid"] == "E002270067A94C18"


SAMPLE_SHORT_HEADER = "serial_number,uid,battery,log_timestamp,log_altitude,total_flights,prior_flights,flights_over_18kft,recorded_flights,flight_index,start_time_utc,end_time_utc,start_barometric_altitude_msl_ft,end_barometric_altitude_msl_ft"
SAMPLE_SHORT_DATA_LINE = (
"5,E00227006796B05F,Good,on,on,7,0,0,7,1,2023-Apr-20T14-48-53Z,2023-Apr-20T14-56-07Z,5364,1444"
)

SAMPLE_SHORT_HEADER_COL_IDX = parser.ColumnIndices.from_header(SAMPLE_SHORT_HEADER)


def test_faux_series_getter_old_log_raises() -> None:
ds = parser.FauxSeries(SAMPLE_SHORT_DATA_LINE.split(","), SAMPLE_SHORT_HEADER_COL_IDX)

with pytest.raises(KeyError):
_ = ds["device_health"]
154 changes: 154 additions & 0 deletions tests/test_log_objects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import datetime as dt
from dataclasses import fields
from functools import partial
from pathlib import Path
from textwrap import dedent

import pytest

from dropmate_py import parser

DROP_RECORD_P = partial(
parser.DropRecord,
serial_number="cereal",
battery=parser.Health.GOOD,
device_health=parser.Health.GOOD,
firmware_version=5.1,
end_time_utc=dt.datetime(
year=2023, month=4, day=20, hour=11, minute=30, second=0, tzinfo=dt.timezone.utc
),
start_barometric_altitude_msl_ft=1000,
end_barometric_altitude_msl_ft=0,
dropmate_internal_time_utc=dt.datetime(
year=2023, month=4, day=20, hour=12, minute=30, second=0, tzinfo=dt.timezone.utc
),
last_scanned_time_utc=dt.datetime(
year=2023, month=4, day=20, hour=12, minute=30, second=0, tzinfo=dt.timezone.utc
),
)

SAMPLE_FULL_HEADER = "serial_number,uid,battery,device_health,firmware_version,log_timestamp,log_altitude,total_flights,flights_over_18kft,recorded_flights,flight_index,start_time_utc,end_time_utc,start_barometric_altitude_msl_ft,end_barometric_altitude_msl_ft,dropmate_internal_time_utc,last_scanned_time_utc,scan_device_type,scan_device_os,dropmate_app_version"
SAMPLE_DATA_LINE = "cereal,ABC123,Good,good,5.1,true,true,3,0,3,1,2023-04-20T11:00:00Z,2023-04-20T11:30:00Z,1000,0,2023-04-20T12:30:00Z,2023-04-20T12:30:00Z,SM S901U1,31,1.5.16"

SAMPLE_FULL_HEADER_COL_IDX = parser.ColumnIndices.from_header(SAMPLE_FULL_HEADER)


def test_droprecord_from_raw() -> None:
truth_log = DROP_RECORD_P(
uid="ABC123",
start_time_utc=dt.datetime(
year=2023, month=4, day=20, hour=11, minute=00, second=0, tzinfo=dt.timezone.utc
),
)

log = parser.DropRecord.from_raw(SAMPLE_DATA_LINE, SAMPLE_FULL_HEADER_COL_IDX)

for i in fields(truth_log):
left = getattr(log, i.name)
right = getattr(truth_log, i.name)
assert left == right, f"Mismatch for field {i.name}"


LOG_EQUALITY_TEST_CASES = (
(
DROP_RECORD_P(
uid="ABC123",
start_time_utc=dt.datetime(
year=2023, month=4, day=20, hour=11, minute=00, second=0, tzinfo=dt.timezone.utc
),
),
DROP_RECORD_P(
uid="ABC123",
start_time_utc=dt.datetime(
year=2023, month=4, day=20, hour=11, minute=00, second=0, tzinfo=dt.timezone.utc
),
),
True,
),
(
DROP_RECORD_P(
uid="ABC123",
start_time_utc=dt.datetime(
year=2023, month=4, day=20, hour=11, minute=00, second=0, tzinfo=dt.timezone.utc
),
),
DROP_RECORD_P(
uid="ABC123",
start_time_utc=dt.datetime(
year=2023, month=4, day=21, hour=11, minute=00, second=0, tzinfo=dt.timezone.utc
),
),
False,
),
(
DROP_RECORD_P(
uid="ABC123",
start_time_utc=dt.datetime(
year=2023, month=4, day=20, hour=11, minute=00, second=0, tzinfo=dt.timezone.utc
),
),
DROP_RECORD_P(
uid="ABC1234",
start_time_utc=dt.datetime(
year=2023, month=4, day=20, hour=11, minute=00, second=0, tzinfo=dt.timezone.utc
),
),
False,
),
)


@pytest.mark.parametrize(("left", "right", "truth_eq"), LOG_EQUALITY_TEST_CASES)
def test_drop_log_equality(
left: parser.DropRecord, right: parser.DropRecord, truth_eq: bool
) -> None:
assert (left == right) == truth_eq


def test_drop_log_equality_non_droplog_raises() -> None:
log = DROP_RECORD_P(
uid="ABC123",
start_time_utc=dt.datetime(
year=2023, month=4, day=20, hour=11, minute=00, second=0, tzinfo=dt.timezone.utc
),
)

with pytest.raises(NotImplementedError):
log == "foo" # noqa: B015


SAMPLE_CONSOLIDATED_LOG = dedent(
"""\
serial_number,uid,battery,device_health,firmware_version,log_timestamp,log_altitude,total_flights,flights_over_18kft,recorded_flights,flight_index,start_time_utc,end_time_utc,start_barometric_altitude_msl_ft,end_barometric_altitude_msl_ft,dropmate_internal_time_utc,last_scanned_time_utc,scan_device_type,scan_device_os,dropmate_app_version
cereal,A1,Good,good,5.1,true,true,3,0,3,1,2023-04-20T11:00:00Z,2023-04-20T11:30:00Z,1000,0,2023-04-20T12:30:00Z,2023-04-20T12:30:00Z,SM S901U1,31,1.5.16
cereal,A1,Good,good,5.1,true,true,3,0,3,3,2023-04-20T11:00:00Z,2023-04-20T11:30:00Z,1000,0,2023-04-20T12:30:00Z,2023-04-20T12:30:00Z,SM S901U1,31,1.5.16
cereal,A2,Good,good,5.1,true,true,3,0,3,1,2023-04-20T11:00:00Z,2023-04-20T11:30:00Z,1000,0,2023-04-20T12:30:00Z,2023-04-20T12:30:00Z,SM S901U1,31,1.5.16
cereal,A2,Good,good,5.1,true,true,3,0,3,3,2023-04-20T11:00:00Z,2023-04-20T11:30:00Z,1000,0,2023-04-20T12:30:00Z,2023-04-20T12:30:00Z,SM S901U1,31,1.5.16
cereal,A3,Good,good,5.1,true,true,2,0,2,1,2023-04-20T11:00:00Z,2023-04-20T11:30:00Z,1000,0,2023-04-20T12:30:00Z,2023-04-20T12:30:00Z,SM S901U1,31,1.5.16
"""
)


def test_log_line_parse() -> None:
log_lines = SAMPLE_CONSOLIDATED_LOG.splitlines()
parsed_records = parser._parse_raw_log(log_lines)

assert len(parsed_records) == 5


def test_group_by_uid() -> None:
log_lines = SAMPLE_CONSOLIDATED_LOG.splitlines()
parsed_records = parser._parse_raw_log(log_lines)

grouped_records = parser._group_by_uid(parsed_records)
assert len(grouped_records) == 3
assert [rec.uid for rec in grouped_records] == ["A1", "A2", "A3"]


def test_file_parse_pipeline(tmp_path: Path) -> None:
log_file = tmp_path / "compiled.CSV"
log_file.write_text(SAMPLE_CONSOLIDATED_LOG)

grouped_records = parser.log_parse_pipeline(log_file)
assert len(grouped_records) == 3
assert [rec.uid for rec in grouped_records] == ["A1", "A2", "A3"]

0 comments on commit 3184211

Please sign in to comment.