## Vaex- Reading And Processing Huge Datasets in seconds

## What is Vaex?
Vaex is a high performance Python library for lazy Out-of-Core DataFrames (similar to Pandas), to visualize and explore big tabular datasets. It calculates statistics such as mean, sum, count, standard deviation etc, on an N-dimensional grid for more than a billion (10^9) samples/rows per second. Visualization is done using histograms, density plots and 3d volume rendering, allowing interactive exploration of big data. Vaex uses memory mapping, zero memory copy policy and lazy computations for best performance (no memory wasted).

In [3]:
import numpy
import h5py
import vaex
print(numpy.__version__)
print(h5py.__version__)
print(vaex.__version__)
print(hasattr(h5py, "File"))

1.26.4
3.12.1
{'vaex': '4.17.0', 'vaex-core': '4.17.1', 'vaex-viz': '0.5.4', 'vaex-hdf5': '0.14.1', 'vaex-server': '0.9.0', 'vaex-astro': '0.9.3', 'vaex-jupyter': '0.8.2', 'vaex-ml': '0.18.3'}
True


In [4]:
import vaex
import pandas as pd
import numpy as np
n_rows = 1000000
n_cols = 500
df = pd.DataFrame(np.random.randint(0, 100, size=(n_rows, n_cols)), columns=['col%d' % i for i in range(n_cols)])
df.head()

Unnamed: 0,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,...,col490,col491,col492,col493,col494,col495,col496,col497,col498,col499
0,27,6,1,23,28,61,78,97,50,77,...,31,20,4,2,3,14,12,27,40,99
1,28,10,39,93,22,36,26,8,89,34,...,28,66,44,90,12,84,73,37,64,33
2,64,67,78,97,28,78,64,99,41,23,...,96,12,49,50,67,45,28,10,92,1
3,2,21,75,36,85,51,60,76,97,76,...,56,85,67,38,60,74,12,20,0,17
4,72,34,69,50,22,36,43,81,47,20,...,82,13,78,51,50,22,75,17,93,60


In [5]:
df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Columns: 500 entries, col0 to col499
dtypes: int32(500)
memory usage: 1.9 GB


## Creating Csv files

In [7]:
file_path = 'final_data.csv'
df.to_csv(file_path, index=False)

## Create Hdf5 files

In [8]:
vaex_df = vaex.from_csv(file_path, convert=True, chunk_size=5_000_000)

In [9]:
type(vaex_df)

vaex.dataframe.DataFrameLocal

## Read Hdf5 files using Vaex library

In [10]:
vaex_df = vaex.open('final_data.csv.hdf5')

In [11]:
type(vaex_df)

vaex.dataframe.DataFrameLocal

In [12]:
vaex_df.head()

#,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13,col14,col15,col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41,col42,col43,col44,col45,col46,col47,col48,col49,col50,col51,col52,col53,col54,col55,col56,col57,col58,col59,col60,col61,col62,col63,col64,col65,col66,col67,col68,col69,col70,col71,col72,col73,col74,col75,col76,col77,col78,col79,col80,col81,col82,col83,col84,col85,col86,col87,col88,col89,col90,col91,col92,col93,col94,col95,col96,col97,col98,col99,...,col400,col401,col402,col403,col404,col405,col406,col407,col408,col409,col410,col411,col412,col413,col414,col415,col416,col417,col418,col419,col420,col421,col422,col423,col424,col425,col426,col427,col428,col429,col430,col431,col432,col433,col434,col435,col436,col437,col438,col439,col440,col441,col442,col443,col444,col445,col446,col447,col448,col449,col450,col451,col452,col453,col454,col455,col456,col457,col458,col459,col460,col461,col462,col463,col464,col465,col466,col467,col468,col469,col470,col471,col472,col473,col474,col475,col476,col477,col478,col479,col480,col481,col482,col483,col484,col485,col486,col487,col488,col489,col490,col491,col492,col493,col494,col495,col496,col497,col498,col499
0,27,6,1,23,28,61,78,97,50,77,67,68,81,2,13,86,74,34,39,13,38,46,27,45,89,51,24,77,44,19,80,41,99,57,8,51,49,90,26,62,88,41,68,2,91,53,40,64,67,45,56,7,21,80,93,63,57,44,60,13,34,96,43,94,39,87,58,47,41,99,62,79,90,13,48,41,58,18,14,2,4,57,72,2,18,10,55,15,68,95,74,48,58,56,69,32,73,56,83,91,...,38,80,41,96,63,42,84,31,27,8,97,16,59,14,76,84,29,46,62,37,60,3,42,88,95,33,45,81,1,72,59,14,89,63,67,2,24,84,42,54,13,79,76,53,5,45,78,95,38,19,63,30,39,64,20,17,16,97,92,91,47,91,47,78,91,30,40,20,0,90,74,59,17,84,45,34,2,37,11,54,31,32,36,71,5,48,73,6,34,95,31,20,4,2,3,14,12,27,40,99
1,28,10,39,93,22,36,26,8,89,34,15,61,60,13,13,85,70,25,17,12,41,5,13,65,98,58,30,29,39,30,76,57,73,8,56,73,73,80,57,8,91,55,19,30,0,60,88,8,71,64,18,49,61,47,41,49,33,69,76,42,75,33,86,3,78,82,91,60,22,35,34,29,28,25,75,69,41,56,74,19,11,73,3,74,12,70,68,35,87,29,98,75,57,85,6,6,97,72,85,13,...,9,33,8,15,30,3,71,76,54,15,81,91,34,19,19,20,40,84,79,91,83,92,28,47,82,24,71,70,22,51,22,68,49,85,31,4,72,14,45,71,12,47,95,15,89,61,15,33,67,30,40,55,17,29,87,2,86,75,18,79,1,31,5,38,43,2,68,18,21,24,37,40,48,27,91,61,32,67,14,81,85,73,0,45,30,89,75,50,91,14,28,66,44,90,12,84,73,37,64,33
2,64,67,78,97,28,78,64,99,41,23,89,50,0,83,71,49,81,42,51,18,32,61,1,55,56,18,28,56,16,9,77,75,79,94,17,82,16,91,84,95,22,26,36,7,33,34,49,24,36,72,21,89,62,33,97,73,58,99,52,28,12,52,52,24,21,71,98,63,92,57,6,24,44,53,54,58,55,58,72,56,79,50,28,47,35,42,68,64,43,12,51,7,81,78,37,54,46,64,61,62,...,19,75,89,90,59,20,24,29,55,12,84,64,93,93,79,95,63,53,54,32,40,5,99,70,3,86,72,32,29,57,28,76,2,76,17,76,29,96,48,13,40,98,70,21,41,32,93,32,48,94,72,3,54,58,66,10,76,49,68,94,10,3,89,9,42,86,73,19,20,71,90,30,73,9,50,5,18,43,30,85,89,71,73,70,26,7,51,65,26,55,96,12,49,50,67,45,28,10,92,1
3,2,21,75,36,85,51,60,76,97,76,57,83,65,47,39,49,9,19,31,57,99,87,7,46,81,49,10,80,76,94,99,38,69,15,97,13,72,46,99,75,13,11,82,65,63,92,70,33,54,88,12,10,44,46,70,1,20,39,61,52,19,27,57,51,85,2,15,58,94,86,15,67,20,85,35,34,42,26,5,86,2,62,98,15,23,15,5,55,6,60,28,65,25,78,4,72,40,93,27,85,...,12,66,2,50,35,62,70,83,4,62,70,24,57,0,69,1,62,34,21,68,1,32,70,55,63,31,73,63,62,87,53,72,32,27,51,89,71,49,91,5,57,88,80,82,13,28,41,0,7,46,71,25,22,2,63,49,99,95,21,37,25,24,71,1,62,26,75,58,21,12,0,37,5,62,35,32,78,63,6,86,46,6,25,54,88,40,86,42,93,71,56,85,67,38,60,74,12,20,0,17
4,72,34,69,50,22,36,43,81,47,20,61,63,36,0,28,95,86,64,24,62,31,42,15,41,2,21,21,32,76,8,44,92,29,34,15,18,40,49,28,58,89,32,97,93,82,19,94,56,28,35,36,70,58,39,96,68,72,51,45,11,88,12,48,42,58,94,15,63,18,23,20,0,37,42,75,31,48,38,58,89,57,60,57,17,10,99,28,38,3,25,85,16,6,71,1,9,6,23,73,86,...,19,58,77,98,73,72,40,1,96,41,12,22,59,27,84,5,22,37,87,0,22,26,38,73,35,60,85,0,52,21,78,62,93,94,96,48,23,7,35,23,75,42,24,89,63,83,81,86,31,13,28,0,67,2,10,43,28,8,50,13,28,66,80,47,63,90,58,79,1,3,10,45,30,48,74,7,96,60,14,30,44,33,90,55,6,13,49,56,1,59,82,13,78,51,50,22,75,17,93,60
5,67,48,46,4,82,20,50,76,10,88,92,21,83,45,22,60,53,50,3,22,77,55,1,75,99,62,98,72,76,23,78,56,46,70,80,29,91,9,52,73,89,39,90,20,71,22,43,83,87,98,27,33,76,2,92,87,90,46,62,98,33,66,43,77,99,79,0,26,78,15,43,64,19,87,50,47,65,44,4,99,81,42,11,87,86,41,75,67,66,88,56,91,55,87,94,52,3,79,20,23,...,64,91,30,92,31,53,86,36,34,64,30,5,4,47,45,99,5,34,20,25,49,82,75,33,92,82,11,24,28,1,45,64,49,76,87,7,7,50,65,9,0,68,32,41,57,40,31,56,24,56,86,82,13,21,9,14,75,67,93,89,8,60,99,15,89,99,97,43,74,17,28,76,33,36,53,4,23,28,89,23,76,29,26,72,33,98,56,62,16,40,45,93,91,9,65,75,61,52,80,17
6,65,21,2,57,97,77,30,65,78,60,55,71,63,71,30,89,13,39,97,63,39,1,28,40,9,89,35,77,22,16,15,54,69,52,81,38,87,26,57,84,33,65,74,95,51,91,8,8,99,66,39,39,85,37,2,69,33,85,46,62,36,63,93,52,32,94,3,91,86,65,46,98,28,1,51,79,53,70,35,58,83,52,9,89,34,2,73,79,82,98,51,18,75,49,70,85,95,86,2,20,...,25,45,99,43,85,19,13,97,84,37,5,82,29,82,82,43,92,32,95,38,28,78,10,68,67,19,1,90,78,75,48,21,89,91,55,20,86,36,85,56,71,36,5,67,95,6,29,44,33,48,7,48,52,26,31,4,96,42,63,81,99,32,46,53,91,5,42,85,29,49,23,81,52,34,53,32,18,1,32,24,81,72,61,52,34,95,74,76,73,52,53,22,68,69,54,33,85,93,61,60
7,77,72,23,85,72,24,68,73,77,9,64,17,68,42,52,58,95,70,34,45,59,34,15,16,93,15,43,29,71,66,25,20,23,58,80,82,25,66,4,80,51,13,19,17,63,13,31,5,5,8,40,79,86,16,60,23,72,0,88,90,0,24,27,35,39,87,72,53,21,50,3,79,23,20,3,81,83,50,50,68,95,61,61,36,91,79,20,29,99,81,62,52,50,54,15,72,91,87,52,55,...,93,75,21,44,44,52,92,8,25,89,48,98,54,43,29,33,29,22,9,65,21,96,95,10,63,56,82,58,34,92,44,29,16,54,85,59,52,24,4,74,27,97,69,49,61,62,89,79,27,97,35,58,74,9,44,28,1,73,50,88,57,44,67,28,91,31,22,93,99,61,7,83,29,26,74,21,9,37,55,79,47,32,49,17,42,67,32,96,97,72,89,30,22,14,45,7,5,4,30,85
8,89,40,67,35,21,53,65,99,70,25,94,43,97,60,69,4,54,23,75,76,82,37,9,27,31,55,22,44,46,58,37,59,84,81,29,57,92,60,5,75,38,61,20,99,55,8,28,39,70,6,27,5,33,4,38,52,47,43,11,98,60,45,44,86,38,79,84,56,47,65,19,75,92,30,96,32,33,24,84,48,3,22,41,39,75,21,64,48,66,45,4,14,17,36,30,95,9,43,27,84,...,37,75,72,70,73,39,78,11,15,32,68,21,18,46,62,98,4,86,93,78,82,45,29,20,37,73,52,10,12,59,93,24,3,33,22,97,43,28,7,16,2,10,40,59,10,78,36,50,31,16,62,89,1,86,37,9,95,59,64,17,50,30,49,66,24,57,28,73,33,40,9,46,23,44,9,61,98,21,22,2,22,85,23,80,63,34,70,65,49,72,75,73,29,71,72,34,6,88,85,62
9,69,48,23,82,78,22,11,5,96,80,3,21,71,7,56,12,33,22,18,48,28,77,29,24,43,84,98,4,19,17,65,82,0,82,90,23,44,81,15,37,23,40,91,7,3,24,58,32,10,59,6,5,24,74,84,37,22,93,93,97,19,90,96,77,89,27,42,75,99,55,32,64,55,50,20,29,61,45,4,64,38,33,47,87,50,90,86,84,70,69,21,13,50,24,79,63,30,15,11,91,...,70,82,27,52,68,99,36,25,50,79,2,80,65,17,55,72,47,91,2,70,64,33,74,45,85,39,88,20,70,27,8,92,26,12,36,65,88,34,37,54,13,97,43,83,21,19,9,59,35,16,61,21,79,12,13,53,54,31,10,41,65,17,3,16,85,18,91,38,40,23,98,37,38,19,19,96,69,87,76,11,30,39,38,52,8,40,9,31,28,11,18,94,29,27,39,81,91,44,37,49


## Expression system
Don't waste memory or time with feature engineering, we (lazily) transform your data when needed.

In [13]:
%%time
vaex_df['multiplication_col13']=vaex_df.col1*vaex_df.col3

CPU times: total: 0 ns
Wall time: 6.92 ms


In [14]:
vaex_df['multiplication_col13']

Expression = multiplication_col13
Length: 1,000,000 dtype: int64 (column)
---------------------------------------
     0   138
     1   930
     2  6499
     3   756
     4  1700
    ...     
999995  6160
999996  5915
999997  1342
999998   574
999999  1188

## Out-of-core DataFrame

Filtering and evaluating expressions will not waste memory by making copies; the data is kept untouched on disk, and will be streamed only when needed. Delay the time before you need a cluster.

In [15]:
vaex_df[vaex_df.col2>70]

#,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13,col14,col15,col16,col17,col18,col19,col20,col21,col22,col23,col24,col25,col26,col27,col28,col29,col30,col31,col32,col33,col34,col35,col36,col37,col38,col39,col40,col41,col42,col43,col44,col45,col46,col47,col48,col49,col50,col51,col52,col53,col54,col55,col56,col57,col58,col59,col60,col61,col62,col63,col64,col65,col66,col67,col68,col69,col70,col71,col72,col73,col74,col75,col76,col77,col78,col79,col80,col81,col82,col83,col84,col85,col86,col87,col88,col89,col90,col91,col92,col93,col94,col95,col96,col97,col98,col99,...,col401,col402,col403,col404,col405,col406,col407,col408,col409,col410,col411,col412,col413,col414,col415,col416,col417,col418,col419,col420,col421,col422,col423,col424,col425,col426,col427,col428,col429,col430,col431,col432,col433,col434,col435,col436,col437,col438,col439,col440,col441,col442,col443,col444,col445,col446,col447,col448,col449,col450,col451,col452,col453,col454,col455,col456,col457,col458,col459,col460,col461,col462,col463,col464,col465,col466,col467,col468,col469,col470,col471,col472,col473,col474,col475,col476,col477,col478,col479,col480,col481,col482,col483,col484,col485,col486,col487,col488,col489,col490,col491,col492,col493,col494,col495,col496,col497,col498,col499,multiplication_col13
0,64,67,78,97,28,78,64,99,41,23,89,50,0,83,71,49,81,42,51,18,32,61,1,55,56,18,28,56,16,9,77,75,79,94,17,82,16,91,84,95,22,26,36,7,33,34,49,24,36,72,21,89,62,33,97,73,58,99,52,28,12,52,52,24,21,71,98,63,92,57,6,24,44,53,54,58,55,58,72,56,79,50,28,47,35,42,68,64,43,12,51,7,81,78,37,54,46,64,61,62,...,75,89,90,59,20,24,29,55,12,84,64,93,93,79,95,63,53,54,32,40,5,99,70,3,86,72,32,29,57,28,76,2,76,17,76,29,96,48,13,40,98,70,21,41,32,93,32,48,94,72,3,54,58,66,10,76,49,68,94,10,3,89,9,42,86,73,19,20,71,90,30,73,9,50,5,18,43,30,85,89,71,73,70,26,7,51,65,26,55,96,12,49,50,67,45,28,10,92,1,6499.0
1,2,21,75,36,85,51,60,76,97,76,57,83,65,47,39,49,9,19,31,57,99,87,7,46,81,49,10,80,76,94,99,38,69,15,97,13,72,46,99,75,13,11,82,65,63,92,70,33,54,88,12,10,44,46,70,1,20,39,61,52,19,27,57,51,85,2,15,58,94,86,15,67,20,85,35,34,42,26,5,86,2,62,98,15,23,15,5,55,6,60,28,65,25,78,4,72,40,93,27,85,...,66,2,50,35,62,70,83,4,62,70,24,57,0,69,1,62,34,21,68,1,32,70,55,63,31,73,63,62,87,53,72,32,27,51,89,71,49,91,5,57,88,80,82,13,28,41,0,7,46,71,25,22,2,63,49,99,95,21,37,25,24,71,1,62,26,75,58,21,12,0,37,5,62,35,32,78,63,6,86,46,6,25,54,88,40,86,42,93,71,56,85,67,38,60,74,12,20,0,17,756.0
2,73,36,79,91,50,76,72,53,73,22,51,96,63,49,47,88,54,16,6,29,54,30,22,28,96,88,46,19,57,82,67,49,97,42,35,78,14,80,11,31,31,71,17,11,78,93,0,88,94,23,91,57,93,56,83,66,28,51,8,55,31,2,80,48,79,92,22,13,55,44,96,11,73,72,22,45,53,71,86,39,27,99,64,28,17,77,14,46,69,64,68,65,37,16,46,44,88,79,87,68,...,50,53,35,55,29,49,84,55,14,58,96,86,38,72,32,66,62,68,36,19,69,55,11,53,50,58,47,2,59,79,40,45,5,98,22,51,21,58,16,31,30,88,34,77,76,96,80,13,65,38,97,77,27,87,52,87,77,87,23,87,40,68,94,51,39,14,93,36,44,93,28,59,87,81,25,95,21,49,25,11,35,78,56,49,53,57,79,3,61,75,8,41,25,36,76,16,17,97,46,3276.0
3,50,58,84,3,30,42,13,81,5,61,21,1,23,12,23,39,24,11,86,78,19,94,66,25,27,15,58,80,88,31,99,65,20,59,2,31,19,90,66,45,34,86,39,35,66,28,87,93,66,55,95,95,40,96,91,35,95,75,42,18,74,80,87,36,5,6,25,80,71,89,29,9,58,71,67,22,28,19,78,78,52,81,63,30,50,67,38,65,84,27,72,53,56,4,63,15,82,34,20,1,...,0,95,46,83,73,36,8,59,92,37,56,91,69,58,10,17,46,99,28,61,7,72,9,33,26,28,60,24,69,42,10,75,37,86,53,40,79,40,99,52,83,23,45,10,56,33,86,64,70,45,58,24,42,71,53,25,46,60,27,13,90,4,7,39,85,4,49,74,90,96,32,74,82,71,66,42,97,27,90,24,49,55,26,31,74,12,40,99,68,29,70,66,29,30,90,29,68,21,36,174.0
4,60,54,99,98,99,91,61,44,4,23,27,59,88,62,75,90,24,87,22,70,17,20,71,83,66,27,11,84,63,3,18,98,87,69,24,57,1,32,62,40,32,69,95,4,73,68,77,75,35,81,14,37,72,64,25,34,50,80,70,16,89,53,13,74,68,8,18,57,23,15,13,13,29,87,87,1,71,42,46,79,50,73,50,26,67,42,7,46,33,71,48,68,76,70,90,87,33,28,43,33,...,31,3,44,84,90,11,85,13,78,1,51,86,35,24,22,68,4,85,75,19,1,17,86,75,79,43,29,84,32,54,77,30,62,85,28,45,66,36,64,15,70,33,99,64,49,59,3,21,75,59,20,14,84,47,49,91,63,56,0,75,31,81,94,36,2,8,40,30,2,80,99,72,83,95,52,45,13,63,50,48,40,31,58,8,5,3,27,96,37,48,70,65,87,58,66,83,26,68,86,5292.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,364.0
290206,32,28,85,13,59,1,94,73,98,3,44,80,31,91,44,89,46,9,77,65,62,78,85,95,56,99,28,84,53,38,47,93,41,89,96,9,1,88,83,0,84,83,91,49,25,83,19,44,11,23,96,99,79,23,81,2,3,10,0,16,88,82,73,40,48,64,69,81,20,4,54,73,84,28,1,70,34,12,4,79,83,86,73,24,16,77,81,70,22,0,66,62,56,7,62,89,9,82,63,26,...,40,89,12,40,19,43,10,77,23,50,54,65,88,21,50,69,72,46,2,0,83,47,49,41,37,19,13,1,66,92,49,53,6,40,78,98,2,43,63,35,36,40,21,31,59,58,74,69,13,96,74,36,98,87,41,24,96,43,60,18,41,21,60,60,4,5,77,37,78,77,0,2,22,85,30,82,16,3,56,83,98,94,11,92,73,87,79,72,87,96,65,77,61,97,32,47,91,8,25,3977.0
290207,30,97,92,41,56,86,33,25,94,40,77,39,98,54,61,98,69,50,39,69,3,55,4,31,49,76,58,29,11,90,65,37,0,70,69,56,54,74,72,12,10,82,1,91,74,50,84,85,48,40,0,68,89,62,0,33,18,72,98,16,17,44,5,55,43,55,91,86,16,71,59,57,60,69,97,19,25,3,49,73,19,53,51,11,18,17,18,2,56,79,52,38,64,87,91,80,42,50,29,13,...,79,56,59,10,78,4,78,64,12,96,0,49,26,25,62,89,21,59,64,2,91,32,20,59,80,4,67,56,58,51,6,21,35,25,74,99,38,6,28,66,90,37,15,33,93,16,74,65,24,95,66,19,65,98,66,47,4,51,34,38,63,44,49,43,23,41,27,38,56,54,9,34,2,35,23,62,13,80,30,39,11,77,42,63,31,43,35,8,83,72,25,25,8,41,50,61,56,34,45,5785.0
290208,51,89,95,65,80,65,22,14,46,56,46,45,30,87,8,27,44,24,82,11,88,70,69,23,9,72,29,61,49,99,66,26,25,5,72,14,97,70,3,70,79,21,10,91,99,26,11,8,77,77,60,78,12,7,25,62,80,75,78,75,69,81,48,34,13,92,18,48,16,42,46,7,29,96,35,21,25,50,47,8,82,29,11,40,13,85,79,61,45,41,84,91,98,47,96,9,96,77,28,3,...,81,80,5,41,69,34,73,96,27,19,24,77,75,37,50,98,76,59,34,53,11,18,58,12,82,36,91,68,76,19,23,56,99,95,98,47,12,61,13,49,49,18,21,72,26,26,81,8,72,19,84,91,25,46,94,30,60,14,43,14,86,1,62,8,40,81,45,60,55,49,15,78,64,98,35,95,93,30,31,63,16,99,53,66,0,75,2,38,20,10,56,68,49,73,8,29,26,4,84,1060.0
290209,86,20,79,53,47,42,58,36,89,47,17,1,97,27,59,68,69,17,52,87,89,94,49,96,86,26,94,51,78,11,57,82,70,84,56,58,14,73,17,8,79,63,91,52,39,20,82,35,74,45,1,31,75,82,50,58,16,54,18,99,44,67,22,75,87,85,73,42,73,88,24,10,15,9,86,46,82,58,13,91,85,92,31,36,44,16,79,10,33,89,49,26,29,22,5,96,43,64,50,26,...,38,14,12,22,65,46,27,81,74,80,15,67,94,72,20,92,37,7,94,65,34,59,71,93,82,52,38,50,30,7,25,31,39,89,67,29,87,18,0,55,53,54,44,11,72,27,23,64,81,30,84,69,97,57,81,66,99,71,39,41,56,2,2,61,27,9,81,19,94,68,82,86,61,25,93,96,22,92,4,80,32,84,8,18,12,49,78,64,39,48,62,41,65,45,40,56,58,64,39,6160.0


In [16]:
dff=vaex_df[vaex_df.col2>70]  ##Here Filtering will not make a memory copy

In [17]:
### All the agorithms work out of core, the limit is the size of your harddriver
dff.col2.minmax(progress='widget')

HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))

array([71, 99], dtype=int64)

## Fast groupby / aggregations
Vaex implements parallelized, highly performant groupby operations, especially when using categories (>1 billion/second).

In [18]:
%%time
vaex_df_group=vaex_df.groupby(vaex_df.col1,agg=vaex.agg.mean(vaex_df.col4))
vaex_df_group

CPU times: total: 31.2 ms
Wall time: 82.9 ms


#,col1,col4_mean
0,0,49.31644177911044
1,1,49.874616829823
2,2,49.200547778454045
3,3,49.19603764239723
4,4,49.462472406181014
...,...,...
95,95,49.638805371818
96,96,49.37867647058823
97,97,49.46523795150418
98,98,49.57930214115781


In [19]:
%%time
vaex_df.groupby(vaex_df.col1,agg='count')

CPU times: total: 93.8 ms
Wall time: 71.1 ms


#,col1,count
0,0,10005
1,1,10113
2,2,9858
3,3,10095
4,4,9966
...,...,...
95,95,9978
96,96,10064
97,97,9939
98,98,10088
