In [19]:
import csv
from enum import Enum

In [20]:
class FeatureType(Enum):
    Numerical = 1
    Category = 2

In [21]:
file_path = "data/heart.csv"

## Basic data ma

In [34]:
def get_default_descriptor():
    default_descriptor = [FeatureType.Numerical for _ in range(14)]
    default_descriptor[1] = FeatureType.Category
    return default_descriptor

def read_raw_data(file_path):
    with open(file_path, newline='' ) as file:
        reader = csv.reader(file, delimiter=',')
        names = next(reader)
        raw_data = list(reader)
    return raw_data, names

def get_possible_feature_values(data, index):
    possible_values = []
    for row in data:
        for i, feature in enumerate(row):
            if i == index:
                possible_values.append(feature)
    return list(set(possible_values))

def find_among_possible(possible_values, value):
    return possible_values.index(value)

def find_possible_values_for_all(data, descriptor):
    possible_values = []
    for i, feature_type in enumerate(descriptor):
        if feature_type == FeatureType.Category:
            possible_values.append(get_possible_feature_values(data, i))
        else:
            possible_values.append(None)
    return possible_values

def add_category_to_data_point(data_point, feature, possible_values):
    index = find_among_possible(possible_values, feature)
    result = [0 for _ in possible_values]
    result[index] = 1
    return data_point + result

def get_data(file_path, descriptor):
    raw_data, _ = read_raw_data(file_path)
    possible_values = find_possible_values_for_all(raw_data, descriptor)
    result = []
    for row in raw_data:
        data_point = []
        for i, (feature, feature_type) in enumerate(zip(row, descriptor)):
            if feature_type == FeatureType.Numerical:
                data_point.append(float(feature))
            elif feature_type == FeatureType.Category:
                data_point = add_category_to_data_point(data_point, feature, possible_values[i])
        result.append(tuple(data_point))
    return result