In [16]:
import numpy as np
import pandas as pd

Load CSV data (drop first column)

In [17]:
data = pd.read_csv('test_data.csv', header=None, prefix="Attr_")
data = data.drop([0]) # Used to remove first row
data = data.reset_index(drop=True)
print(data.head(5))

  Attr_0 Attr_1 Attr_2
0     16     50    yes
1     16      0     no
2     31      1     no
3     31      1    yes
4     46     26     no


Get indiscernibility for the given attributes.

In [18]:
def get_ind_for_attributes(attr_list):
    """
    Returns indiscernibility for the attributes.
    """
    final = []
    skip_i_elements = []
    for i, row in data.iterrows():
        temp_row = row[attr_list]

        if i not in skip_i_elements:
            ind_elements = []
            for j in range(i, len(data)):
                temp_row_j = data.iloc[j][attr_list]
                if temp_row.equals(temp_row_j):
                    ind_elements.append(j)
                    skip_i_elements.append(j)
            final.append(ind_elements)
            ind_elements = []

    return final;



list_of_attr = ["Attr_0", "Attr_1"]
# list_of_attr = ["Attr_0"] 
indiscernibility = get_ind_for_attributes(list_of_attr) # list of IND for every attribute (contains list of row positions)
print(f"Indiscernibility: {indiscernibility}")

Indiscernibility: [[0], [1], [2, 3], [4, 6], [5]]


Get values for the given target attribute.

In [19]:
def get_values_for_target_attr(target_attr, target_value):
    """
    Returns the list of row positions for the target attribute and target value.
    """
    final = []
    for i, row in data.iterrows():
        if (row[target_attr].strip() == target_value):
            final.append(i)
    return final;



approximation_target_attr_value = "yes"
target_attr = "Attr_2"
x = get_values_for_target_attr(target_attr, approximation_target_attr_value)
print(f"X: {x} for target attribute: {approximation_target_attr_value}")


X: [0, 3, 5] for target attribute: yes


Lower approximation.

In [20]:
def get_lower_approxiamtion(x, indiscernibility):
    """
    Lower approximation method.
    """
    lower_approx = []
    for i in indiscernibility:
        for sub_i in i:
            if sub_i in x:
                lower_approx.append(sub_i)
            else:
                break
    return lower_approx;


lower_approx = get_lower_approxiamtion(x, indiscernibility)
print(f"Lower Approximation: {lower_approx}")

Lower Approximation: [0, 5]


Upper approximation

In [21]:
def get_upper_approxiamtion(x, indiscernibility):
    """
    Upper approximation method.
    """
    upper_approx = []
    # If one sublist is present then add all elements from that sublist
    for i in indiscernibility:
        present = False
        for sub_i in i:
            if sub_i in x:
                present = True
                break;
        if present:
            upper_approx.extend(i)

    return upper_approx


upper_aprox = get_upper_approxiamtion(x, indiscernibility)
print(f"Upper Approximation: {upper_aprox}")

Upper Approximation: [0, 2, 3, 5]


Boudary regions (boudary & outside)

In [22]:
def get_boundary_region(upper_approx, lower_approx):
    """
    Returns the boundary region.
    """
    return [x for x in upper_approx if x not in lower_approx]


boundary_reg = get_boundary_region(upper_aprox, lower_approx)
print(f"Boundary region: {boundary_reg}")



def get_outside_region(data_elements_i, upper_approx):
    """
    Returns the outside region.
    """
    return [x for x in data_elements_i if x not in upper_approx]


outside_region = get_outside_region(list(data.index.values), upper_aprox)
print(f"Outside region: {outside_region}")

Boundary region: [2, 3]
Outside region: [1, 4, 6]


Roughness based on approximation..?

In [23]:
def get_data_roughness(lower_approx, upper_approx):
    roughness = 0
    lower_approx_count = len(lower_approx)
    upper_approx_count = len(upper_approx)
    if(lower_approx_count == 0):
        roughness = 1
    else:
        roughness = 1 - (lower_approx_count/upper_approx_count)
    return roughness

print(f"Roughness: {get_data_roughness(lower_approx, upper_aprox)}")

Roughness: 0.5
