Let's test all operations in Pandas to check if it can compete with the sqlite implementation.

Object:
x: float, y: float, s: string

DataFrame Columns:
x, y, s, obj_id, obj
where obj_id is a unique index

Update: Nope, using an index to look up numbers is very bad. Building an indexed df of 10k items took > 1 second. 
Let's just resign ourselves to:
 - O(n) remove, maybe with a mark-and-sweep gc
 - O(n) update, since we have to find the obj_id.

O(n) update looking good! <1ms on 1M item set.

In [1]:
import random
import time
import pandas as pd
import sys
from pympler.asizeof import asizeof


In [2]:
letters = ['qwertyuiopasdfghjklzxcvbnm']
class Thing:
    def __init__(self):
        self.x = random.random()
        self.y = random.random()
        #self.s = ''.join(random.choice(letters) for _ in range(5))

n_things = 10**7
things = [Thing() for _ in range(n_things)]

# 10^7: 2.3G

In [3]:
t0 = time.time()
df = pd.DataFrame({
    'x': [t.x for t in things],
    'y': [t.y for t in things],
    't': [t for t in things],
    'obj_id': [id(t) for t in things],
    }
)
t1 = time.time()
print(t1-t0)
# 10^7: 2.6G, so 300MB. Nice!

15.785856246948242


In [4]:
asizeof(df) / n_things

128.0008584

In [15]:
"""
# 128 bytes / obj before
df = df.set_index('obj_id')
# 362 bytes / obj after, ouch.

n_lookups = 10**3
t0 = time.time()
for _ in range(n_lookups):
    oid = id(random.choice(things))
    df.loc[oid]
t1 = time.time()
print(t1-t0)
"""
# sub-millisecond find by id. That's good!

"\n# 128 bytes / obj before\ndf = df.set_index('obj_id')\n# 362 bytes / obj after, ouch.\n\nn_lookups = 10**3\nt0 = time.time()\nfor _ in range(n_lookups):\n    oid = id(random.choice(things))\n    df.loc[oid]\nt1 = time.time()\nprint(t1-t0)\n"

In [28]:
# no-index lookup by id
n_runs = 10**3
t0 = time.time()
for _ in range(n_runs):
    t = random.choice(things)
    df[df['obj_id'] == id(t)]
t1 = time.time()
print((t1-t0)/n_runs)
t = random.choice(things)
df[df['obj_id'] == id(t)]

# still sub-ms lookup at 1M items, very nice!

0.005320491552352905


Unnamed: 0,x,y,t,obj_id
248988,0.525043,0.016362,<__main__.Thing object at 0x7f1a31ad44c0>,139750479316160


In [25]:
n_runs = 5
for e in range(0,8):
    n_finds = 10**e
    thresh = n_finds/10**7
    t_run = 0
    for _ in range(n_runs):
        df.query('y <= 1 and x <= {}'.format(thresh)).t.to_list()
        t0 = time.time()
        ls = df.query('y <= 1 and x <= {}'.format(thresh)).t.to_list()
        t1 = time.time()
        t_run += (t1-t0)/n_runs
    print(n_finds, t_run)

# 3.5ms query @ 1M, 38ms @ 10M

1 0.06491284370422364
10 0.039666366577148435
100 0.03907394409179688
1000 0.039962148666381835
10000 0.04311203956604004
100000 0.07470993995666504
1000000 0.1988347053527832
10000000 0.6385342121124267


In [21]:
n_runs = 10**2
t0 = time.time()
for _ in range(n_runs):
    t = random.choice(things)
    idx = df[df['obj_id'] == id(t)].index[0]
    df.at[idx,'x'] = 12
    t.x = 12
t1 = time.time()
print((t1-t0)/n_runs)
df.loc[idx]

# 5ms update on one of 10M items

0.010246312618255616


x                                              12.0
y                                          0.717548
t         <__main__.Thing object at 0x7f421c1123d0>
obj_id                              139921915454416
Name: 8001709, dtype: object

In [22]:
print(len(df))
t0=time.time()
df.drop(idx, inplace=True)
t1 = time.time()
print(len(df))
print(t1-t0)

9999997
9999996
0.6332800388336182


In [18]:
t1-t0

0.6305458545684814