Skip to content

Commit

Permalink
Add deque_len parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
mberk committed May 31, 2023
1 parent 1063cb9 commit e0dc8b8
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 3 deletions.
39 changes: 37 additions & 2 deletions betfairutil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2167,10 +2167,45 @@ def get_inplay_publish_time_from_prices_file(


def get_total_volume_traded_from_prices_file(
path_to_prices_file: Union[str, Path]
path_to_prices_file: Union[str, Path], deque_len: Optional[int] = 8
) -> Optional[Union[int, float]]:
"""
Working out the total volume traded on a market is surprisingly tricky given Betfair's shenanigans around the market
closure. Specifically, the last few updates in the price stream result in data getting zeroed out which means if you
were to just look at the last available market book it would look like the total volume traded was zero. In fact, it
appears there is no consistency as to how many price stream updates are involved in this zeroing of data and so it's
not as simple as having a rule such as "look at the second to last market book available".
If memory usage is not an issue then the most robust method would be to read the entire set of market books into
memory then iterate them in reverse order looking for the first market book that has a non-zero total volume traded.
However, in many use cases memory usage is a significant concern. For example, if you want to process tens of
thousands of markets and use parallel processing to speed this up then you end up needing to hold the entire set of
market books for multiple markets in memory simultaneously and, depending on the number of cores your machine has
and the number of parallel processes you use, it is easy to exhaust the total memory available.
The solution presented here is to use a deque
(https://docs.python.org/3/library/collections.html#collections.deque). Think of this as a limited length list
where when a new item is appended to the list, another item is removed for the start of the list. This is used when
reading the prices file to leave us with the X last market books which can then be iterated in reverse order to find
the last one which has a non-zero volume traded. Based on my own analysis of prices files I have settled on a
default maximum length of the deque of 8 - i.e. at most this function will store 8 market books in memory at any one
time. This is a trade-off between giving correct results and drastically cutting down on memory usage versus reading
the entire set of market books into memory. If memory usage is not of concern to you then you can simply set the
deque_len argument to None and the function will read the entire set of market books into memory ensuring
correctness. If you observe any prices files where a deque of length 8 doesn't give correct results then please
create a GitHub issue here: https://github.com/mberk/betfairutil/issues so I can investigate.
:param path_to_prices_file: Where the Betfair prices file to be processed is located. This can be a local file, one
stored in AWS S3, or any of the other options that can be handled by the smart_open package. The file can be
compressed or uncompressed
:param deque_len: The length of the deque used to limit memory usage when working out the total volume traded. See
discussion above. This can be set to None to use the entire set of market books which should ensure correctness
but dramatically increase memory usage
:return: The calculated total volume traded if it is available or None if the market was pulled by Betfair (i.e. all
runners have a status of "REMOVED")
"""
g = create_market_book_generator_from_prices_file(path_to_prices_file)
market_books = deque(g, 8)
market_books = deque(g, deque_len)
for market_book in reversed(market_books):
volume_traded = calculate_total_matched(market_book)
if volume_traded > 0:
Expand Down
62 changes: 61 additions & 1 deletion tests/test_non_prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,15 @@
from betfairutil import get_number_of_jumps_remaining
from betfairutil import get_pre_event_volume_traded_from_prices_file
from betfairutil import get_race_change_from_race_file
from betfairutil import get_race_distance_in_metres_from_race_card
from betfairutil import get_race_id_from_string
from betfairutil import get_race_leaders
from betfairutil import get_runner_book_from_market_book
from betfairutil import get_second_best_price
from betfairutil import get_second_best_price_size
from betfairutil import get_seconds_to_market_time
from betfairutil import get_selection_id_to_runner_name_map_from_market_catalogue
from betfairutil import get_race_distance_in_metres_from_race_card
from betfairutil import get_total_volume_traded_from_prices_file
from betfairutil import get_win_market_id_from_race_card
from betfairutil import get_win_market_id_from_race_file
from betfairutil import get_winners_from_market_definition
Expand Down Expand Up @@ -1241,3 +1242,62 @@ def test_get_mid_price(market_book: Dict[str, Any]):

runner_book["ex"]["availableToLay"].append({"price": 1.99, "size": 1})
assert get_mid_price(runner_book) == pytest.approx(1.985)


def test_get_total_volume_traded_from_prices_file(
path_to_prices_file: Path,
market_book: Dict[str, Any],
market_definition: Dict[str, Any],
):
total_volume_traded = get_total_volume_traded_from_prices_file(path_to_prices_file)
assert total_volume_traded == 0

with smart_open.open(path_to_prices_file, "w") as f:
f.write(
json.dumps(
{
"op": "mcm",
"clk": 0,
"pt": market_book["publishTime"],
"mc": [
{
"id": "1.123",
"marketDefinition": market_definition,
"rc": [
{"id": 123, "atb": [[1.98, 1]], "trd": [[1.99, 1]]},
{"id": 456, "atb": [[1.98, 1]]},
],
}
],
}
)
)
f.write("\n")
total_volume_traded = get_total_volume_traded_from_prices_file(path_to_prices_file)
assert total_volume_traded == 1

for runner in market_definition["runners"]:
runner["status"] = "REMOVED"
with smart_open.open(path_to_prices_file, "w") as f:
f.write(
json.dumps(
{
"op": "mcm",
"clk": 0,
"pt": market_book["publishTime"],
"mc": [
{
"id": "1.123",
"marketDefinition": market_definition,
"rc": [
{"id": 123, "atb": [[1.98, 1]]},
{"id": 456, "atb": [[1.98, 1]]},
],
}
],
}
)
)
f.write("\n")
total_volume_traded = get_total_volume_traded_from_prices_file(path_to_prices_file)
assert total_volume_traded is None

0 comments on commit e0dc8b8

Please sign in to comment.