# `read-protobuf` Examples

In [1]:
import pandas as pd
import demo_pb2

from read_protobuf import read_protobuf 

## Create a protobuf message

In [6]:
# container
Collection = demo_pb2.Collection()

# individual records
Record = demo_pb2.Record()
Record.int = 1234
Record.float = 43.685
Record.nested.data = 1.2
Record.rep.extend([1])
Record.rep.extend([2])

RecordTwo = demo_pb2.Record()
RecordTwo.int = 1253135
RecordTwo.float = -73.2324

# add both types of records
for i in range(10_000):
    if i % 5 == 0:
        Collection.records.extend([RecordTwo])
    else:
        Collection.records.extend([Record])

# serialize
msg = Collection.SerializeToString()

## Interpret a protobuf

In [7]:
CollectionType = demo_pb2.Collection()  # instantiate a new message type
df = read_protobuf(msg, CollectionType) # create a dataframe from serialized protobuf bytes
df.head()

Unnamed: 0,int,float,rep,data
0,1253135,-73.232399,,
1,1234,43.685001,"[1, 2]",1.2
2,1234,43.685001,"[1, 2]",1.2
3,1234,43.685001,"[1, 2]",1.2
4,1234,43.685001,"[1, 2]",1.2


In [8]:
# options
df = read_protobuf(msg, CollectionType, prefix_nested=True)
df.head()

Unnamed: 0,int,float,rep,nested.data
0,1253135,-73.232399,,
1,1234,43.685001,"[1, 2]",1.2
2,1234,43.685001,"[1, 2]",1.2
3,1234,43.685001,"[1, 2]",1.2
4,1234,43.685001,"[1, 2]",1.2


## Benchmark

`Collection` has 10,000 records

#### read_protobuf

In [12]:
%%time

df = read_protobuf(msg, Collection) # create a dataframe from serialized protobuf bytes

CPU times: user 54.6 ms, sys: 3.17 ms, total: 57.7 ms
Wall time: 56.3 ms


#### MessageToJson

In [10]:
from google.protobuf.json_format import MessageToJson

In [11]:
%%time

msg_json = MessageToJson(Collection)
df = pd.read_json(msg_json)

CPU times: user 148 ms, sys: 4.88 ms, total: 153 ms
Wall time: 151 ms


#### MessageToDict

In [14]:
from google.protobuf.json_format import MessageToDict

In [15]:
%%time

msg_dict = MessageToDict(Collection)
df = pd.DataFrame(msg_dict)

CPU times: user 99.3 ms, sys: 3.14 ms, total: 102 ms
Wall time: 101 ms
