[core][fix] move slotted data to previous slot, not next (#2226)

aquamatthias · web-flow · commit 7f41f8009292 · 2024-10-08T14:44:28.000+02:00
diff --git a/fixcore/fixcore/db/arango_query.py b/fixcore/fixcore/db/arango_query.py
@@ -1157,9 +1157,9 @@ def load_time_series(
     time_slot = ctx.next_crs()
     slotter = int(granularity.total_seconds())
     gran = ctx.add_bind_var(slotter)
-    offset = int(start.timestamp() - ((start.timestamp() // slotter) * slotter))
+    offset = ctx.add_bind_var(slotter - int(start.timestamp() - ((start.timestamp() // slotter) * slotter)))
     # slot the time by averaging each single group
-    query += f" LET {time_slot} = (FLOOR(d.at / @{gran}) * @{gran}) + @{ctx.add_bind_var(offset)}"
+    query += f" LET {time_slot} = (FLOOR((d.at + @{offset}) / @{gran}) * @{gran}) - @{offset}"
     query += f" COLLECT group_slot={time_slot}, complete_group=d.group"
     if avg_factor:  # Required as long as https://github.com/arangodb/arangodb/issues/21096 is not fixed
         assert avg_factor > 0, "Given average factor must be greater than 0!"
diff --git a/fixcore/tests/fixcore/db/arango_query_test.py b/fixcore/tests/fixcore/db/arango_query_test.py
@@ -331,29 +331,29 @@ def test_load_time_series() -> None:
     q, bv = load_time_series("ts", "foo", now - (24 * one_hour), now, one_hour, group_by=[])
     assert (
         q == "LET m1 = ( FOR d in `ts` FILTER d.ts==@b0 AND d.at>=@b1 AND d.at<@b2 "
-        "LET m0 = (FLOOR(d.at / @b3) * @b3) + @b4 "
+        "LET m0 = (FLOOR((d.at + @b4) / @b3) * @b3) - @b4 "
         "COLLECT group_slot=m0, complete_group=d.group "
         "AGGREGATE slot_avg = AVG(d.v) "
         "RETURN {at: group_slot, group: complete_group, v: slot_avg} )\n "
         "FOR d in m1 COLLECT group_slot=d.at AGGREGATE agg_val=avg(d.v) "
         "SORT group_slot RETURN {at: group_slot, v: agg_val}"
     )
-    assert bv == {"b0": "foo", "b1": 1699913600, "b2": 1700000000, "b3": 3600, "b4": 800}
+    assert bv == {"b0": "foo", "b1": 1699913600, "b2": 1700000000, "b3": 3600, "b4": 2800}
     # no group by defined --> group by all values
     q, bv = load_time_series("ts", "foo", now - (24 * one_hour), now, one_hour)
     assert (
         q == "FOR d in `ts` FILTER d.ts==@b0 AND d.at>=@b1 AND d.at<@b2 "
-        "LET m0 = (FLOOR(d.at / @b3) * @b3) + @b4 "
+        "LET m0 = (FLOOR((d.at + @b4) / @b3) * @b3) - @b4 "
         "COLLECT group_slot=m0, complete_group=d.group "
         "AGGREGATE slot_avg = AVG(d.v) "
         "RETURN {at: group_slot, group: complete_group, v: slot_avg}"
     )
-    assert bv == {"b0": "foo", "b1": 1699913600, "b2": 1700000000, "b3": 3600, "b4": 800}
+    assert bv == {"b0": "foo", "b1": 1699913600, "b2": 1700000000, "b3": 3600, "b4": 2800}
     # group by specific group variables
     q, bv = load_time_series("ts", "foo", now - (24 * one_hour), now, one_hour, group_by=["a", "b"])
     assert (
         q == "LET m1 = ( FOR d in `ts` FILTER d.ts==@b0 AND d.at>=@b1 AND d.at<@b2 "
-        "LET m0 = (FLOOR(d.at / @b3) * @b3) + @b4 "
+        "LET m0 = (FLOOR((d.at + @b4) / @b3) * @b3) - @b4 "
         "COLLECT group_slot=m0, complete_group=d.group "
         "AGGREGATE slot_avg = AVG(d.v) "
         "RETURN {at: group_slot, group: complete_group, v: slot_avg} )\n "
@@ -362,22 +362,22 @@ def test_load_time_series() -> None:
         "AGGREGATE agg_val=avg(d.v) "
         "SORT group_slot RETURN {at: group_slot,group: { a: group_a, b: group_b }, v: agg_val}"
     )
-    assert bv == {"b0": "foo", "b1": 1699913600, "b2": 1700000000, "b3": 3600, "b4": 800}
+    assert bv == {"b0": "foo", "b1": 1699913600, "b2": 1700000000, "b3": 3600, "b4": 2800}
     # group by specific group variables and filter by group variables
     q, bv = load_time_series(
         "ts", "foo", now - (24 * one_hour), now, one_hour, group_by=["a", "b"], group_filter=[P("a").eq("a")]
     )
     assert (
         q == "LET m1 = ( FOR d in `ts` FILTER d.ts==@b0 AND d.at>=@b1 AND d.at<@b2 FILTER d.group.a == @b3 "
-        "LET m0 = (FLOOR(d.at / @b4) * @b4) + @b5 "
+        "LET m0 = (FLOOR((d.at + @b5) / @b4) * @b4) - @b5 "
         "COLLECT group_slot=m0, complete_group=d.group "
         "AGGREGATE slot_avg = AVG(d.v) RETURN {at: group_slot, group: complete_group, v: slot_avg} )\n "
         "FOR d in m1 "
         "COLLECT group_slot=d.at, group_a=d.group.a, group_b=d.group.b "
         "AGGREGATE agg_val=avg(d.v) "
         "SORT group_slot RETURN {at: group_slot,group: { a: group_a, b: group_b }, v: agg_val}"
     )
-    assert bv == {"b0": "foo", "b1": 1699913600, "b2": 1700000000, "b3": "a", "b4": 3600, "b5": 800}
+    assert bv == {"b0": "foo", "b1": 1699913600, "b2": 1700000000, "b3": "a", "b4": 3600, "b5": 2800}
     # use avg-factor
     q, _ = load_time_series("ts", "foo", now - (24 * one_hour), now, one_hour, avg_factor=1000)
     assert "slot_avg = AVG(d.v / @b" in q  # factor divides average
diff --git a/fixcore/tests/fixcore/db/timeseriesdb_test.py b/fixcore/tests/fixcore/db/timeseriesdb_test.py
@@ -91,13 +91,13 @@ async def create_ts(before_now: timedelta, granularity: timedelta, number_of_ent
                 "bucket": "Bucket(start=5mo25d, end=2yr, resolution=3d)",
                 "start": "2021-12-01T00:00:00Z",
                 "end": "2023-06-04T00:00:00Z",
-                "data_points": 90,  # 24 days (1d->3d) --> 0,3,6,9,12,15,18,21,24 ==> 9 with 10 entries each ==> 90
+                "data_points": 80,  # 24 days (1d->3d) --> 0,3,6,9,12,15,18,21 ==> 8 with 10 entries each ==> 80
             },
         ]
     }
-    assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 450
+    assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 440
     assert await timeseries_db.downsample(now) == "No changes since last downsample run"
-    assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 450
+    assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 440
     assert await timeseries_db.downsample(now=now + timedelta(days=27)) == {
         "test": [
             {
@@ -114,7 +114,7 @@ async def create_ts(before_now: timedelta, granularity: timedelta, number_of_ent
             },
         ]
     }
-    assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 230
+    assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 220
     assert await timeseries_db.downsample(now=now + timedelta(days=200)) == {
         "test": [
             {
@@ -125,9 +125,9 @@ async def create_ts(before_now: timedelta, granularity: timedelta, number_of_ent
             }
         ]
     }
-    assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 140
+    assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 130
     assert await timeseries_db.downsample(now=now + timedelta(days=400)) == {}
-    assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 140
+    assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 130
 
 
 async def test_acquire_lock(timeseries_db: TimeSeriesDB) -> None:

Original file line number	Diff line number	Diff line change
`@@ -91,13 +91,13 @@ async def create_ts(before_now: timedelta, granularity: timedelta, number_of_ent`
`91`	`91`	`"bucket": "Bucket(start=5mo25d, end=2yr, resolution=3d)",`
`92`	`92`	`"start": "2021-12-01T00:00:00Z",`
`93`	`93`	`"end": "2023-06-04T00:00:00Z",`
`94`		`- "data_points": 90, # 24 days (1d->3d) --> 0,3,6,9,12,15,18,21,24 ==> 9 with 10 entries each ==> 90`
	`94`	`+ "data_points": 80, # 24 days (1d->3d) --> 0,3,6,9,12,15,18,21 ==> 8 with 10 entries each ==> 80`
`95`	`95`	`},`
`96`	`96`	`]`
`97`	`97`	`}`
`98`		`- assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 450`
	`98`	`+ assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 440`
`99`	`99`	`assert await timeseries_db.downsample(now) == "No changes since last downsample run"`
`100`		`- assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 450`
	`100`	`+ assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 440`
`101`	`101`	`assert await timeseries_db.downsample(now=now + timedelta(days=27)) == {`
`102`	`102`	`"test": [`
`103`	`103`	`{`
`@@ -114,7 +114,7 @@ async def create_ts(before_now: timedelta, granularity: timedelta, number_of_ent`
`114`	`114`	`},`
`115`	`115`	`]`
`116`	`116`	`}`
`117`		`- assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 230`
	`117`	`+ assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 220`
`118`	`118`	`assert await timeseries_db.downsample(now=now + timedelta(days=200)) == {`
`119`	`119`	`"test": [`
`120`	`120`	`{`
`@@ -125,9 +125,9 @@ async def create_ts(before_now: timedelta, granularity: timedelta, number_of_ent`
`125`	`125`	`}`
`126`	`126`	`]`
`127`	`127`	`}`
`128`		`- assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 140`
	`128`	`+ assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 130`
`129`	`129`	`assert await timeseries_db.downsample(now=now + timedelta(days=400)) == {}`
`130`		`- assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 140`
	`130`	`+ assert timeseries_db.db.collection(timeseries_db.collection_name).count() == 130`
`131`	`131`
`132`	`132`
`133`	`133`	`async def test_acquire_lock(timeseries_db: TimeSeriesDB) -> None:`