# retrieve run data from mongo

In [96]:
from pymongo import MongoClient
import polars as pl

client = MongoClient("localhost", 27017)
db = client["trader"]
documents = list(db["message"].find({}))
run_data = pl.DataFrame(documents)

filtered_data = run_data.filter(pl.col("trading_session_id") == run_data['trading_session_id'][-1])

preprocessed_data = [{
    'order_book': row['content'].get('order_book'),
    'incoming': {k: v for k, v in row['content'].get('incoming_message', {}).items() if k != 'trader_id'},
    'timestamp': row['timestamp']
} for row in filtered_data.to_dicts()]

new_order_books = pl.DataFrame(preprocessed_data).sort("timestamp", nulls_last=True).with_row_index("row_number")
print(new_order_books)

shape: (13, 4)
┌────────────┬─────────────────────────────────┬──────────────────┬─────────────────────────┐
│ row_number ┆ order_book                      ┆ incoming         ┆ timestamp               │
│ ---        ┆ ---                             ┆ ---              ┆ ---                     │
│ u32        ┆ struct[2]                       ┆ struct[3]        ┆ datetime[μs]            │
╞════════════╪═════════════════════════════════╪══════════════════╪═════════════════════════╡
│ 0          ┆ {[],[]}                         ┆ {null,null,null} ┆ 2024-06-03 23:49:58.871 │
│ 1          ┆ {[{2000.0,1.0}],[]}             ┆ {1,2000.0,1}     ┆ 2024-06-03 23:49:58.965 │
│ 2          ┆ {[],[]}                         ┆ {1,2000.0,-1}    ┆ 2024-06-03 23:49:59.047 │
│ 3          ┆ {[{2001.0,1.0}],[]}             ┆ {1,2001.0,1}     ┆ 2024-06-03 23:49:59.652 │
│ 4          ┆ {[{2001.0,1.0}],[{2011.0,1.0}]} ┆ {1,2011.0,-1}    ┆ 2024-06-03 23:50:01.798 │
│ …          ┆ …                             

In [97]:
new_order_books.head(10)

row_number,order_book,incoming,timestamp
u32,struct[2],struct[3],datetime[μs]
0,"{[],[]}","{null,null,null}",2024-06-03 23:49:58.871
1,"{[{2000.0,1.0}],[]}","{1,2000.0,1}",2024-06-03 23:49:58.965
2,"{[],[]}","{1,2000.0,-1}",2024-06-03 23:49:59.047
3,"{[{2001.0,1.0}],[]}","{1,2001.0,1}",2024-06-03 23:49:59.652
4,"{[{2001.0,1.0}],[{2011.0,1.0}]}","{1,2011.0,-1}",2024-06-03 23:50:01.798
5,"{[],[{2011.0,1.0}]}","{-1,2001.0,1}",2024-06-03 23:50:01.879
6,"{[{2000.0,1.0}],[{2011.0,1.0}]}","{1,2000.0,1}",2024-06-03 23:50:03.048
7,"{[{2000.0,1.0}],[]}","{-1,2011.0,-1}",2024-06-03 23:50:03.131
8,"{[],[]}","{1,2000.0,-1}",2024-06-03 23:50:04.998
9,"{[{2000.0,1.0}],[]}","{1,2000.0,1}",2024-06-03 23:50:06.720


In [75]:
filtered_data.head(10)

_id,trading_session_id,content,timestamp
object,str,struct[10],datetime[μs]
665e402af71a5defe57a5a80,"""1994acd0-e676-…","{""Market is open"",""BOOK_UPDATED"",{[],[]},[],[],null,null,null,{null,null,null,null},null}",2024-06-03 23:14:02.403
665e402af71a5defe57a5a81,"""1994acd0-e676-…","{null,""ADD_ORDER"",{[{2000.0,1.0}],[]},[{b""\xf4\x0f\xb6\xe0\x88fBV\x9c\xe5M\xfd\x0d\xfe\xd6\xc2"",""NOISE_ee0c9ce2-4a6a-4d3f-8d45-4db1a5e19ef0"",1,1.0,2000.0,2024-06-03 22:14:02.440}],[],null,null,null,{1,2000.0,1,""NOISE_ee0c9ce2-4a6a-4d3f-8d45-4db1a5e19ef0""},""add_order update processed""}",2024-06-03 23:14:02.494
665e402cf71a5defe57a5a82,"""1994acd0-e676-…","{null,""ADD_ORDER"",{[],[]},[],[],null,null,null,{1,2000.0,-1,""NOISE_ee0c9ce2-4a6a-4d3f-8d45-4db1a5e19ef0""},""add_order update processed""}",2024-06-03 23:14:04.299


In [27]:
new_order_books.head(10)

row_number,order_book,incoming,timestamp
u32,struct[2],struct[4],datetime[μs]
0,"{[],[]}","{null,null,null,null}",2024-06-03 00:48:40.647
1,"{[{2000.0,1.0}],[]}","{1,2000,1,null}",2024-06-03 00:48:40.766
2,"{[],[]}","{1,2000,-1,null}",2024-06-03 00:48:40.867
3,"{[{2001.0,1.0}],[]}","{1,2001,1,null}",2024-06-03 00:48:40.955
4,"{[{2001.0,1.0}],[{2011.0,1.0}]}","{1,2011,-1,null}",2024-06-03 00:48:41.048
5,"{[{2001.0,1.0}, {2000.0,1.0}],[{2011.0,1.0}]}","{1,2000,1,null}",2024-06-03 00:48:41.154
6,"{[{2000.0,1.0}],[{2011.0,1.0}]}","{1,2000,-1,null}",2024-06-03 00:48:41.261
7,"{[{2000.0,2.0}],[{2011.0,1.0}]}","{1,2000,1,null}",2024-06-03 00:48:41.346
8,"{[{2000.0,1.0}],[{2011.0,1.0}]}","{1,2000,-1,null}",2024-06-03 00:48:41.488
9,"{[{2000.0,2.0}],[{2011.0,1.0}]}","{1,2000,1,null}",2024-06-03 00:48:41.571


In [28]:
from analysis.utilities import delete_all_tables

delete_all_tables()

DuckDB tables deleted successfully.
MongoDB collection deleted successfully.
