# `brain-plasma` performance

In [3]:
from brain_plasma import Brain
import numpy as np
import pandas as pd
import pickle
import os
import time

# utilities
def show(t,method,code_string,n):
    print(' || '.join([str(x) for x in [round((time.time()-t)*1000,4), method, n, code_string]]))

s = '--------'
mem,pic,bra = ('in-memory','pickle','brain-plasma')
brain = Brain() # 1.5GB
track = pd.DataFrame(columns=['task','method','time','round','code'])
i = 0


print(s,'Saving large objects - 10,000,000x10 DataFrame of integers',s)
task = 'save large'

method = mem
code = 'x = 5'
for n in [1,2,3]:
    start = time.time()
    x = 5
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1


method = pic
code = "pickle.dump(pd.DataFrame({a:range(10000000) for a in 'abcdefghij'}),open('test.pkl','wb'))"
for n in [1,2,3]:
    start = time.time()
    pickle.dump(pd.DataFrame({a:range(10000000) for a in 'abcdefghij'}),open('test.pkl','wb'))
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1


method = bra
code = "brain['x'] = pd.DataFrame({a:range(10000000) for a in 'abcdefghij'})"
for n in (1,2,3):
    start = time.time()
    brain['x'] = pd.DataFrame({a:range(10000000) for a in 'abcdefghij'})
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1


print(s,'Loading large objects - 10,000,000x10 DataFrame of integers',s)
task = 'load large'

method = mem
code = 'y = x'
for n in [1,2,3]:
    start = time.time()
    y = x
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1

method = pic
code = "y = pickle.load(open('test.pkl','rb'))"
for n in [1,2,3]:
    start = time.time()
    y = pickle.load(open('test.pkl','rb'))
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1

method = bra
code = "y = brain['x']"
for n in (1,2,3):
    start = time.time()
    y = brain['x']
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1










print(s,'Saving small objects - a single string "this is the test string"',s)
brain.forget('x')
os.remove('test.pkl')
task = 'save small'

method = mem
code = 'x = "this is the test string"'
for n in [1,2,3]:
    start = time.time()
    x = "this is the test string"
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1

method = pic
code = "pickle.dump('this is the test string',open('test.pkl','wb'))"
for n in [1,2,3]:
    start = time.time()
    pickle.dump('this is the test string',open('test.pkl','wb'))
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1

method = bra
code = "brain['x'] = pd.DataFrame({a:range(10000000) for a in 'abcdefghij'})"
for n in (1,2,3):
    start = time.time()
    brain['x'] = pd.DataFrame({a:range(10000000) for a in 'abcdefghij'})
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1


print(s,'Loading small objects - a single string "this is the test string"',s)
task = 'load small'

method = mem
code = 'y = x'
for n in [1,2,3]:
    start = time.time()
    y = x
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1

method = pic
code = "y = pickle.load(open('test.pkl','rb'))"
for n in [1,2,3]:
    start = time.time()
    y = pickle.load(open('test.pkl','rb'))
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1

method = bra
code = "y = brain['x']"
for n in (1,2,3):
    start = time.time()
    y = brain['x']
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1









medium_json = {
    k:{
      "_id": "5d4c5b531211cb59f55e0562",
      "index": 0,
      "guid": "855bad44-23f2-4353-acfe-d8e24fed1486",
      "isActive": True,
      "balance": "$3,840.16",
      "picture": "http://placehold.it/32x32",
      "age": 21,
      "eyeColor": "green",
      "name": {
        "first": "Mcgowan",
        "last": "Conway"
      },
      "company": "AQUACINE",
      "email": "mcgowan.conway@aquacine.us",
      "phone": "+1 (999) 447-2069",
      "address": "655 Boynton Place, Nutrioso, Florida, 9478",
      "about": "Consequat do exercitation incididunt irure sit dolor aliquip amet sunt qui quis fugiat cillum. Aliquip irure enim ullamco tempor ullamco consectetur adipisicing deserunt aliqua tempor exercitation. Aliquip occaecat sit nisi Lorem. Magna incididunt dolor fugiat aliquip commodo eiusmod elit ea occaecat elit elit veniam. Consequat tempor aliquip voluptate sunt exercitation sit adipisicing. Anim consequat officia dolor veniam aliquip voluptate proident tempor Lorem quis nisi dolore.",
      "registered": "Monday, October 6, 2014 12:32 PM",
      "latitude": "65.583401",
      "longitude": "-61.690822",
      "tags": [
        "voluptate",
        "ipsum",
        "exercitation",
        "tempor",
        "est"
      ],
      "range": [
        0,1,2,3,4,5,6,7,8,9
      ],
      "friends": [
        {
          "id": 0,
          "name": "Michele Thompson"
        },
        {
          "id": 1,
          "name": "Ebony Montgomery"
        },
        {
          "id": 2,
          "name": "Wolf Alvarez"
        }
      ],
      "greeting": "Hello, Mcgowan! You have 5 unread messages.",
      "favoriteFruit": "apple"
    }
  for k in range(10)
}
print(s,'Saving medium objects - list of 10000 json dictionaries w/random key:values from https://next.json-generator.com/41mO9BHXD',s)
brain.forget('x')
os.remove('test.pkl')
task = 'save medium'

method = mem
code = 'x = [medium_json for x in range(1000)]'
for n in [1,2,3]:
    start = time.time()
    x = [medium_json for x in range(1000)]
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1

method = pic
code = "pickle.dump([medium_json for x in range(1000)],open('test.pkl','wb'))"
for n in [1,2,3]:
    start = time.time()
    pickle.dump([medium_json for x in range(1000)],open('test.pkl','wb'))
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1

method = bra
code = "brain['x'] = [medium_json for x in range(1000)]"
for n in (1,2,3):
    start = time.time()
    brain['x'] = [medium_json for x in range(1000)]
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1


print(s,'Loading medium objects - a single string "this is the test string"',s)
task = 'load medium'

method = mem
code = 'y = x'
for n in [1,2,3]:
    start = time.time()
    y = x
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1

method = pic
code = "y = pickle.load(open('test.pkl','rb'))"
for n in [1,2,3]:
    start = time.time()
    y = pickle.load(open('test.pkl','rb'))
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1

method = bra
code = "y = brain['x']"
for n in (1,2,3):
    start = time.time()
    y = brain['x']
    show(start,method,code,n)
    track.loc[i] = [task,method,round((time.time()-start)*1000,4),n,code]
    i+=1

-------- Saving large objects - 10,000,000x10 DataFrame of integers --------
0.0021 || in-memory || 1 || x = 5
0.0021 || in-memory || 2 || x = 5
0.0012 || in-memory || 3 || x = 5
3228.8871 || pickle || 1 || pickle.dump(pd.DataFrame({a:range(10000000) for a in 'abcdefghij'}),open('test.pkl','wb'))
2949.3291 || pickle || 2 || pickle.dump(pd.DataFrame({a:range(10000000) for a in 'abcdefghij'}),open('test.pkl','wb'))
2876.519 || pickle || 3 || pickle.dump(pd.DataFrame({a:range(10000000) for a in 'abcdefghij'}),open('test.pkl','wb'))
863.3139 || brain-plasma || 1 || brain['x'] = pd.DataFrame({a:range(10000000) for a in 'abcdefghij'})
751.3571 || brain-plasma || 2 || brain['x'] = pd.DataFrame({a:range(10000000) for a in 'abcdefghij'})
779.609 || brain-plasma || 3 || brain['x'] = pd.DataFrame({a:range(10000000) for a in 'abcdefghij'})
-------- Loading large objects - 10,000,000x10 DataFrame of integers --------
0.0012 || in-memory || 1 || y = x
0.001 || in-memory || 2 || y = x
0.0012 || in-me

In [4]:
compare = pd.DataFrame(track.groupby(['task','method']).time.mean()).reset_index()
compare

Unnamed: 0,task,method,time
0,load large,brain-plasma,20.053233
1,load large,in-memory,0.057367
2,load large,pickle,934.575033
3,load medium,brain-plasma,245.8567
4,load medium,in-memory,0.078933
5,load medium,pickle,0.6914
6,load small,brain-plasma,1.066033
7,load small,in-memory,0.247733
8,load small,pickle,0.4104
9,save large,brain-plasma,798.324


In [14]:
compare['size'] = [ x.split()[1] for x in compare['task']]
compare['task'] = [x.split()[0] for x in compare['task']]

In [16]:
d = compare.to_dict()
d

{'task': {0: 'load',
  1: 'load',
  2: 'load',
  3: 'load',
  4: 'load',
  5: 'load',
  6: 'load',
  7: 'load',
  8: 'load',
  9: 'save',
  10: 'save',
  11: 'save',
  12: 'save',
  13: 'save',
  14: 'save',
  15: 'save',
  16: 'save',
  17: 'save'},
 'method': {0: 'brain-plasma',
  1: 'in-memory',
  2: 'pickle',
  3: 'brain-plasma',
  4: 'in-memory',
  5: 'pickle',
  6: 'brain-plasma',
  7: 'in-memory',
  8: 'pickle',
  9: 'brain-plasma',
  10: 'in-memory',
  11: 'pickle',
  12: 'brain-plasma',
  13: 'in-memory',
  14: 'pickle',
  15: 'brain-plasma',
  16: 'in-memory',
  17: 'pickle'},
 'time': {0: 20.05323333333333,
  1: 0.05736666666666667,
  2: 934.5750333333332,
  3: 245.85669999999996,
  4: 0.07893333333333334,
  5: 0.6913999999999999,
  6: 1.0660333333333334,
  7: 0.24773333333333336,
  8: 0.41040000000000004,
  9: 798.324,
  10: 0.04800000000000001,
  11: 3018.3553666666667,
  12: 121.55136666666665,
  13: 0.11863333333333333,
  14: 0.8370000000000001,
  15: 802.9539333333333,


In [2]:
import plotly as py
import plotly.graph_objects as go
import plotly.express as px

In [20]:
px.bar(compare,x='method',y='time',facet_col='task',color='method',facet_row='size')