# retrieve run data from mongo

In [70]:
from pymongo import MongoClient
import polars as pl

client = MongoClient("localhost", 27017)
db = client["trader"]
documents = list(db["message"].find({}))
run_data = pl.DataFrame(documents)

filtered_data = run_data.filter(pl.col("trading_session_id") == run_data['trading_session_id'][-1])

preprocessed_data = [{
    'order_book': row['content'].get('order_book'),
    'incoming': {k: v for k, v in row['content'].get('incoming_message', {}).items() if k != 'trader_id'},
    'timestamp': row['timestamp']
} for row in filtered_data.to_dicts()]

new_order_books = pl.DataFrame(preprocessed_data).sort("timestamp", nulls_last=True).with_row_index("row_number")
print(new_order_books)

shape: (9, 4)
┌────────────┬─────────────────────────────────┬──────────────────┬─────────────────────────┐
│ row_number ┆ order_book                      ┆ incoming         ┆ timestamp               │
│ ---        ┆ ---                             ┆ ---              ┆ ---                     │
│ u32        ┆ struct[2]                       ┆ struct[3]        ┆ datetime[μs]            │
╞════════════╪═════════════════════════════════╪══════════════════╪═════════════════════════╡
│ 0          ┆ {[],[]}                         ┆ {null,null,null} ┆ 2024-06-03 23:08:07.669 │
│ 1          ┆ {[{2000.0,1.0}],[]}             ┆ {1,2000.0,1}     ┆ 2024-06-03 23:08:07.777 │
│ 2          ┆ {[],[]}                         ┆ {1,2000.0,-1}    ┆ 2024-06-03 23:08:09.818 │
│ 3          ┆ {[{2001.0,1.0}],[]}             ┆ {1,2001.0,1}     ┆ 2024-06-03 23:08:12.005 │
│ 4          ┆ {[{2001.0,1.0}],[{2011.0,1.0}]} ┆ {1,2011.0,-1}    ┆ 2024-06-03 23:08:12.664 │
│ 5          ┆ {[],[{2011.0,1.0}]}            

In [71]:
new_order_books.head(10)

row_number,order_book,incoming,timestamp
u32,struct[2],struct[3],datetime[μs]
0,"{[],[]}","{null,null,null}",2024-06-03 23:08:07.669
1,"{[{2000.0,1.0}],[]}","{1,2000.0,1}",2024-06-03 23:08:07.777
2,"{[],[]}","{1,2000.0,-1}",2024-06-03 23:08:09.818
3,"{[{2001.0,1.0}],[]}","{1,2001.0,1}",2024-06-03 23:08:12.005
4,"{[{2001.0,1.0}],[{2011.0,1.0}]}","{1,2011.0,-1}",2024-06-03 23:08:12.664
5,"{[],[{2011.0,1.0}]}","{null,null,null}",2024-06-03 23:08:12.749
6,"{[{2000.0,1.0}],[{2011.0,1.0}]}","{1,2000.0,1}",2024-06-03 23:08:12.899
7,"{[{2000.0,1.0}],[]}","{null,null,null}",2024-06-03 23:08:12.992
8,"{[],[]}","{1,2000.0,-1}",2024-06-03 23:08:14.275


In [69]:
filtered_data.head(10)

_id,trading_session_id,content,timestamp
object,str,struct[10],datetime[μs]
665e3ea73e61cf2a8bcb3ee6,"""c0e450e1-598e-…","{""Market is open"",""BOOK_UPDATED"",{[],[]},[],[],null,null,null,{null,null,null,null},null}",2024-06-03 23:07:35.062
665e3ea73e61cf2a8bcb3ee7,"""c0e450e1-598e-…","{null,""ADD_ORDER"",{[{2000.0,1.0}],[]},[{b""\xd4nD4\xadKI\x96\xbeB\xfa9N\xd3$."",""NOISE_9b9f7b9a-a10b-4480-a576-abbb9ecb7b11"",1,1.0,2000.0,2024-06-03 22:07:35.108}],[],null,null,null,{1,2000.0,1,""NOISE_9b9f7b9a-a10b-4480-a576-abbb9ecb7b11""},""add_order update processed""}",2024-06-03 23:07:35.190
665e3ea83e61cf2a8bcb3ee8,"""c0e450e1-598e-…","{null,""ADD_ORDER"",{[],[]},[],[],null,null,null,{1,2000.0,-1,""NOISE_9b9f7b9a-a10b-4480-a576-abbb9ecb7b11""},""add_order update processed""}",2024-06-03 23:07:36.600


In [27]:
new_order_books.head(10)

row_number,order_book,incoming,timestamp
u32,struct[2],struct[4],datetime[μs]
0,"{[],[]}","{null,null,null,null}",2024-06-03 00:48:40.647
1,"{[{2000.0,1.0}],[]}","{1,2000,1,null}",2024-06-03 00:48:40.766
2,"{[],[]}","{1,2000,-1,null}",2024-06-03 00:48:40.867
3,"{[{2001.0,1.0}],[]}","{1,2001,1,null}",2024-06-03 00:48:40.955
4,"{[{2001.0,1.0}],[{2011.0,1.0}]}","{1,2011,-1,null}",2024-06-03 00:48:41.048
5,"{[{2001.0,1.0}, {2000.0,1.0}],[{2011.0,1.0}]}","{1,2000,1,null}",2024-06-03 00:48:41.154
6,"{[{2000.0,1.0}],[{2011.0,1.0}]}","{1,2000,-1,null}",2024-06-03 00:48:41.261
7,"{[{2000.0,2.0}],[{2011.0,1.0}]}","{1,2000,1,null}",2024-06-03 00:48:41.346
8,"{[{2000.0,1.0}],[{2011.0,1.0}]}","{1,2000,-1,null}",2024-06-03 00:48:41.488
9,"{[{2000.0,2.0}],[{2011.0,1.0}]}","{1,2000,1,null}",2024-06-03 00:48:41.571


In [28]:
from analysis.utilities import delete_all_tables

delete_all_tables()

DuckDB tables deleted successfully.
MongoDB collection deleted successfully.
