In [11]:
import sys
import os
import pandas as pd
import torch

sys.path.append(os.path.abspath(os.path.join("..")))

from filter.vertical_reduction import transform, provenance, compare

In [2]:
data = {"col1": [1, 2, 3, 4, 5], "col2": [8, 6, 9, 7, 10]}
input_df = pd.DataFrame(data)
input_df

Unnamed: 0,col1,col2
0,1,8
1,2,6
2,3,9
3,4,7
4,5,10


In [3]:
selected_columns = ["col1"]

In [4]:
# retain selected columns:
output_df = transform(input_df, selected_columns, retain=True)
output_df

Unnamed: 0,col1
0,1
1,2
2,3
3,4
4,5


In [5]:
provenance_tensor = provenance(input_df, output_df, sparse=True)

print(
    f"Sparse provenance tensor for vertical reduction operation:\n{provenance_tensor}"
)

Sparse provenance tensor for vertical reduction operation:
tensor(indices=tensor([[0]]),
       values=tensor([1]),
       size=(2,), nnz=1, dtype=torch.int8, layout=torch.sparse_coo)


In [6]:
provenance_tensor = provenance(input_df, output_df, sparse=False)

print(f"Dense provenance tensor for vertical reduction operation:\n{provenance_tensor}")

Dense provenance tensor for vertical reduction operation:
tensor([[1, 0],
        [1, 0],
        [1, 0],
        [1, 0],
        [1, 0]], dtype=torch.int8)


In [8]:
# drop selected columns:
output_df = transform(input_df, selected_columns, retain=False)
output_df

Unnamed: 0,col2
0,8
1,6
2,9
3,7
4,10


In [9]:
provenance_tensor = provenance(input_df, output_df, sparse=True)

print(
    f"Sparse provenance tensor for vertical reduction operation:\n{provenance_tensor}"
)

Sparse provenance tensor for vertical reduction operation:
tensor(indices=tensor([[1]]),
       values=tensor([1]),
       size=(2,), nnz=1, dtype=torch.int8, layout=torch.sparse_coo)


In [10]:
provenance_tensor = provenance(input_df, output_df, sparse=False)

print(f"Dense provenance tensor for vertical reduction operation:\n{provenance_tensor}")

Dense provenance tensor for vertical reduction operation:
tensor([[0, 1],
        [0, 1],
        [0, 1],
        [0, 1],
        [0, 1]], dtype=torch.int8)


In [12]:
input_df = torch.rand(500, 500)
compare(input_df, columns=["col1", "col2", "col498"], retain=True)

Sparse Tensor Time: 0.000337s

Provenance Sparse Tensor : tensor(indices=tensor([[  1,   2, 498]]),
       values=tensor([1, 1, 1]),
       size=(500,), nnz=3, dtype=torch.int8, layout=torch.sparse_coo)

Provenance dense Tensor : tensor([[0, 1, 1,  ..., 0, 1, 0],
        [0, 1, 1,  ..., 0, 1, 0],
        [0, 1, 1,  ..., 0, 1, 0],
        ...,
        [0, 1, 1,  ..., 0, 1, 0],
        [0, 1, 1,  ..., 0, 1, 0],
        [0, 1, 1,  ..., 0, 1, 0]], dtype=torch.int8)

Dense Tensor Time: 0.001221s

Results Consistent: True
