In [5]:
import sys
import os
import pandas as pd
import torch

sys.path.append(os.path.abspath(os.path.join("..")))

from oversampling  import oversampling, tensrov

In [6]:
# Define the original dataset as a pandas DataFrame
data = pd.DataFrame({
    'A': [1, 4, 7],
    'B': [2, 5, 8],
    'C': [3, 6, 9]
})

# Convert DataFrame to PyTorch tensor
original_data = torch.tensor(data.values, dtype=torch.float32)

In [None]:
# horizontal oversampling with a factor of 2
horizontal_augmented_data = oversampling(original_data, method='horizontal', factor=2)

horizontal_df = pd.DataFrame(horizontal_augmented_data.numpy(), columns=['A', 'B', 'C', 'A_dup', 'B_dup', 'C_dup'])
print("Horizontal Oversampling Result:")
print(horizontal_df)    

Horizontal Oversampling Result:
     A    B    C  A_dup  B_dup  C_dup
0  1.0  2.0  3.0    1.0    2.0    3.0
1  4.0  5.0  6.0    4.0    5.0    6.0
2  7.0  8.0  9.0    7.0    8.0    9.0


In [8]:
# vertical oversampling with a factor of 3
vertical_augmented_data = oversampling(original_data, method='vertical', factor=3)

vertical_df = pd.DataFrame(vertical_augmented_data.numpy(), columns=['A', 'B', 'C'])
print("\nVertical Oversampling Result:")
print(vertical_df)


Vertical Oversampling Result:
     A    B    C
0  1.0  2.0  3.0
1  4.0  5.0  6.0
2  7.0  8.0  9.0
3  1.0  2.0  3.0
4  4.0  5.0  6.0
5  7.0  8.0  9.0
6  1.0  2.0  3.0
7  4.0  5.0  6.0
8  7.0  8.0  9.0


In [None]:
# Provenance tensor for horizontal oversampling
horizontal_provenance = tensrov(original_data, horizontal_augmented_data)
print("\nProvenance Tensor (Horizontal):")
print(horizontal_provenance)


Provenance Tensor (Horizontal):
tensor(indices=tensor([[0, 1, 2, 3, 4, 5],
                       [0, 1, 2, 0, 1, 2]]),
       values=tensor([1., 1., 1., 1., 1., 1.]),
       size=(3, 6), nnz=6, layout=torch.sparse_coo)


In [None]:
# Provenance tensor for vertical oversampling
vertical_provenance = tensrov(original_data, vertical_augmented_data)
print("\nProvenance Tensor (Vertical):")
print(vertical_provenance)


Provenance Tensor (Vertical):
tensor(indices=tensor([[0, 1, 2, 0, 1, 2, 0, 1, 2],
                       [0, 1, 2, 3, 4, 5, 6, 7, 8]]),
       values=tensor([1., 1., 1., 1., 1., 1., 1., 1., 1.]),
       size=(3, 9), nnz=9, layout=torch.sparse_coo)
