In [1]:
import vaex
import pandas as pd
import panel as pn
import math
import param
import os
import glob
import datetime

In [2]:
pn.extension()

### Vaex

In [3]:
class Loader(param.Parameterized):
    
    file_path = param.String(default="", doc="Enter File Path")
    load_file = param.Action(lambda x: x.param.trigger('load_file'))
    
    
    def __init__(self, **params):
        super(Loader, self).__init__(**params)
        self.data = pd.DataFrame()
        self.data_length = len(self.data)
        self.data_columns = []
        
    @param.depends('load_file', watch=True)
    def load(self):
        if self.file_path != "":
            self.data = vaex.open(str(self.file_path))
            self.data_length = len(self.data)
            self.data_columns = list(self.data.get_column_names())

In [4]:
class Frame(param.Parameterized):
    
    frame = param.DataFrame(pd.DataFrame())
    page = param.Integer(1)
    load = Loader()
    
    def __init__(self, **params):
        super(Frame, self).__init__(**params)
        self.per_page = 30

    @param.depends('load.load_file')
    def pagination(self):
        self.last_page = math.ceil(self.load.data_length / 30)
        self.param.page.bounds = (1, self.last_page)

    @param.depends('load.load_file', 'page')
    def refresh_frame(self):
        if self.load.file_path != "":
            self.from_item = ((self.page) - 1) * self.per_page
            self.to_item = (self.page * self.per_page)
            self.frame = self.load.data[self.from_item: self.to_item].to_pandas_df()
#             self.frame = self.load.data.take(range(self.from_item, self.to_item)).to_pandas_df()

    @param.depends('frame')
    def show_frame(self):
        df_widget = pn.widgets.DataFrame(self.frame, name= 'DataSlice', width=1200)
        return df_widget

In [5]:
operators = [['>='],
             ['<='],
             ['<'],
             ['>'],
             ['!='],
             ['=='],
             ['contains '],
             ['datestartswith ']]

def split_filter_part(filter_part):
    for operator_type in operators:
        for operator in operator_type:
            if operator in filter_part:
                name_part, value_part = filter_part.split(operator, 1)
                value_part = value_part.strip()
                v0 = value_part[0]
                if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
                    value = value_part[1: -1].replace('\\' + v0, v0)
                else:
                    try:
                        value = float(value_part)
                    except ValueError:
                        value = value_part
                return operator_type[0], value
                

    return [None] * 2

In [6]:
def create_query_string(key, op, value):
    query_str = ""
    if type(value)==str:
        if ', ' in value:
            for item in value.split(', '):
                print(item)
                query_str += "(" + key + op + "'"+item+"'" + ")|"
            query_str = query_str[:-1]
        else:
            query_str += "(" + key + op + "'"+value+"'" + ")"
    elif type(value)==float:
        query_str += "(" + key + op + str(value) + ")"
    return query_str

In [11]:
class Filter(Frame):
    
    query_df = param.DataFrame(pd.DataFrame())
    apply = param.Action(lambda x: x.param.trigger('apply'))
    _filters = []
    
    @param.depends('load.load_file')
    def build_query_df(self):
        self.query_df = pd.DataFrame({k:'' for k in self.load.data_columns}, index=[0])
    
    @param.depends('load.load_file', 'query_df')
    def query_menu(self):
        return pn.widgets.DataFrame(self.query_df, width=1200)
    
    @param.depends('apply')
    def apply_query(self):
        if self.load.file_path != "":
            for key, row_value in self.query_df.iteritems():
                qs = row_value[0]
                op, value = split_filter_part(qs)
                query_s = create_query_string(key, op, value)
                if query_s is not '':
                    self._filters.append(query_s)
            for f in list(set(self._filters)):         
                self.load.data = self.load.data[self.load.data[f]]
            self.refresh_frame()
            self.pagination()

In [12]:
filt = Filter()

In [None]:
filt.load.data_length

In [13]:
pn.Row(pn.Column(filt.load, 
                 filt.param.page, 
                 filt.pagination,
                 ),
       pn.Column(filt.build_query_df,
                 filt.query_menu,
                 filt.param.apply,
                 filt.apply_query,
                 filt.refresh_frame, 
                 filt.show_frame
                 )
       )