# retrieve run data from mongo

In [140]:
from pymongo import MongoClient
import polars as pl

client = MongoClient("localhost", 27017)
db = client["trader"]
collection = db["message"]
documents = list(collection.find({}))  # Retrieve all documents
run_data = pl.DataFrame(documents)


import polars as pl

# Filter rows where trading_session_id is '71062ff9-b112-4253-bccb-fc054640d17a'
filtered_data = run_data.filter(pl.col("trading_session_id") == run_data['trading_session_id'][-1])

# Preprocess data in Python
preprocessed_data = []
for row in filtered_data.to_dicts():
    content = row['content']
    order_book = content.get('order_book', None)
    incoming = content.get('incoming_message', None)
    del incoming['trader_id']

    timestamp = row['timestamp']  # Correctly access timestamp from row
    
    preprocessed_data.append({
        'order_book': order_book,
        'incoming': incoming,
        'timestamp': timestamp  # Include timestamp in the dictionary
    })

# Create a new DataFrame from preprocessed data
new_order_books = pl.DataFrame(preprocessed_data)

# Sort by 'timestamp', handling None values by sorting them last
new_order_books = new_order_books.sort("timestamp", nulls_last=True)
new_order_books = new_order_books.with_row_count("row_number")

# Display the DataFrame
print(new_order_books)

shape: (104, 4)
┌────────────┬───────────────────────────────────┬──────────────────┬─────────────────────────┐
│ row_number ┆ order_book                        ┆ incoming         ┆ timestamp               │
│ ---        ┆ ---                               ┆ ---              ┆ ---                     │
│ u32        ┆ struct[2]                         ┆ struct[3]        ┆ datetime[μs]            │
╞════════════╪═══════════════════════════════════╪══════════════════╪═════════════════════════╡
│ 0          ┆ {[],[]}                           ┆ {null,null,null} ┆ 2024-05-28 21:53:22.703 │
│ 1          ┆ {[],[{2002.0,1.0}]}               ┆ {1,2002.0,-1}    ┆ 2024-05-28 21:53:22.793 │
│ 2          ┆ {[],[]}                           ┆ {1,2006.0,1}     ┆ 2024-05-28 21:53:24.336 │
│ 3          ┆ {[{2000.0,1.0}],[]}               ┆ {1,2000.0,1}     ┆ 2024-05-28 21:53:24.427 │
│ 4          ┆ {[{2000.0,2.0}],[]}               ┆ {1,2000.0,1}     ┆ 2024-05-28 21:53:24.508 │
│ …          ┆ …        

  new_order_books = new_order_books.with_row_count("row_number")


In [143]:
new_order_books.head(10)

row_number,order_book,incoming,timestamp
u32,struct[2],struct[3],datetime[μs]
0,"{[],[]}","{null,null,null}",2024-05-28 21:53:22.703
1,"{[],[{2002.0,1.0}]}","{1,2002.0,-1}",2024-05-28 21:53:22.793
2,"{[],[]}","{1,2006.0,1}",2024-05-28 21:53:24.336
3,"{[{2000.0,1.0}],[]}","{1,2000.0,1}",2024-05-28 21:53:24.427
4,"{[{2000.0,2.0}],[]}","{1,2000.0,1}",2024-05-28 21:53:24.508
5,"{[],[]}","{1,2000.0,-1}",2024-05-28 21:53:24.792
6,"{[],[]}","{1,2000.0,-1}",2024-05-28 21:53:24.842
7,"{[{1996.0,1.0}],[]}","{1,1996.0,1}",2024-05-28 21:53:27.027
8,"{[{1996.0,1.0}],[{1998.0,1.0}]}","{1,1998.0,-1}",2024-05-28 21:53:28.455
9,"{[{1996.0,1.0}],[{1998.0,2.0}, {2000.0,1.0}]}","{1,1998.0,-1}",2024-05-28 21:53:28.556


In [47]:
from analysis.utilities import delete_all_tables

delete_all_tables()

DuckDB tables deleted successfully.
MongoDB collection deleted successfully.
