Change the below to point to your Parquet file example if you have one

In [None]:
example_parquet_file_location = "../data/20221109.parquet"

In [None]:
import numpy as np
import pandas as pd

Use the below code to generate a Parquet file with random data similar to a series of trade prices

In [None]:
number_of_rows = 100000
low_size, high_size = 10, 10000
low_price, high_price = 15000.0, 18000.0

In [None]:
df = pd.DataFrame({
    'time': sorted(np.random.randint(0,1e13, size=number_of_rows)),
    'side': np.random.choice(['B', 'S'],number_of_rows),
    'size': np.random.randint(low_size, high_size, size=number_of_rows),
    'price': np.random.uniform(low_price, high_price, number_of_rows)
})

In [None]:
example_parquet_file_location = "../data/random_trade_like_data.parquet"

Write the dataframe out to the Parquet file

This uses an existing Parquet interface

In [None]:
df.to_parquet(example_parquet_file_location)

Add location of shared library to search path

In [None]:
import sys
sys.path.append("../build")

The build places the shared library under 'lib'

In [None]:
import lib.parquet_table as pqt

In [None]:
pt = pqt.ParquetTable(example_parquet_file_location)

In [None]:
pt.print_stats()

In [None]:
pt.column(3)

In [None]:
import datetime
import matplotlib.pyplot as plt

In [None]:
df = pd.DataFrame({ 
    "time": pt.column(0),
    "price": pt.column(3),
    "size": pt.column(2)
})

In [None]:
df.tail()

In [None]:
def parse_time(x):
    x = datetime.datetime.fromtimestamp(x/1e6)
    return x.strftime('%b-%d %H:%M')
    
df['time_bin'] = df['time'].apply(parse_time)

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
tf = df.groupby('time_bin').agg({
    'size': np.sum,
    'price': lambda x: x.iloc[-1]
}).reset_index()
tf.head()

In [None]:
tf.plot(x='time_bin', y=['size'], figsize=(16, 8))

In [None]:
tf.plot(x='time_bin', y=['price'], figsize=(16, 8))

In [None]:
from fastparquet import ParquetFile
pf = ParquetFile("../data/20221109.parquet")

In [None]:
df = pf.to_pandas()