In [3]:
import pandas as pd
import numpy as np
import time
from cbnfs import CBNFS
from data_preparation import *
import csv

**IMPORTANT**

The following lines of code require the fuzzy score values in the *.txt* file, which cannot be publicly deliver.

k DTDT parameter

In [None]:
data = pd.read_csv("raw_data.csv")
data.head()
data = specify_output(data, thr=10, ref_column="Percentile")
data.head()
data = add_fuzzy_scores(data, "fuzzy-score-FTDT-pi05-full.txt")
data = zscore_normalize(np.array(data))
k_vals = [0.2, 0.3, 0.4, 0.5]

for k in k_vals:
    fold_sets = kfold_cv(data)
    for fold, sets in fold_sets.items():
        train_set, test_set = sets
        train_set, val_set = train_val_split(train_set)
        fold_sets[fold] = (train_set, val_set, test_set)

    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = apply_fuzzy_score(sets=sets, k=k)
        fold_sets[fold] = (train_set, val_set, test_set)

    gamma_set = [0.5, 1.0, 1.5, 2.0]
    nrule_set = [5, 15, 25, 35]

    gather_results = {}
    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = sets
        current_best_recall = 0
        for gamma in gamma_set:
            for nrule in nrule_set:
                model = CBNFS(data=train_set, nrule=nrule, gamma=gamma)
                val_output = model.cbnfse(val_set)

                acc, rec, spec, prec, f1_score = calculate_metrics(val_set[:, -1], val_output)
                if rec > current_best_recall:
                    current_best_recall = rec
                    print(f'Current best f1 score: {current_best_recall}')
                    best_nrule = nrule
                    best_gamma = gamma
                    print(f'Fold {fold}')
                    print(f'for gamma = {best_gamma}, nrule = {best_nrule}')
                    gather_results[fold] = {'recall': current_best_recall,
                                            'gamma': best_gamma,
                                            'nrule': best_nrule}

    for fold, results in gather_results.items():
        print(f"Results for fold {fold}:")
        for key, value in results.items():
            print(f"  {key}: {value}\n")

    file_name = f'BW_validated_k0_{int(k * 10)}.csv'
    with open(file_name, 'w', newline='') as csvfile:
        fieldnames = ['fold', 'recall', 'gamma', 'nrule']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for fold, results in gather_results.items():
            row = {'fold': fold}
            row.update(results)
            writer.writerow(row)

In [None]:
data = pd.read_csv("raw_data.csv")
data.head()
data = specify_output(data, thr=7, ref_column="Apgar5")
data.head()
data = add_fuzzy_scores(data, "fuzzy-score-FTDT-pi05-full.txt")
data = zscore_normalize(np.array(data))
k_vals = [0.2, 0.3, 0.4, 0.5]

for k in k_vals:
    fold_sets = kfold_cv(data)
    for fold, sets in fold_sets.items():
        train_set, test_set = sets
        train_set, val_set = train_val_split(train_set)
        fold_sets[fold] = (train_set, val_set, test_set)

    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = apply_fuzzy_score(sets=sets, k=k)
        fold_sets[fold] = (train_set, val_set, test_set)

    gamma_set = [0.5, 1.0, 1.5, 2.0]
    nrule_set = [5, 15, 25, 35]

    gather_results = {}
    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = sets
        current_best_recall = 0
        for gamma in gamma_set:
            for nrule in nrule_set:
                model = CBNFS(data=train_set, nrule=nrule, gamma=gamma)
                val_output = model.cbnfse(val_set)

                acc, rec, spec, prec, f1_score = calculate_metrics(val_set[:, -1], val_output)
                if rec > current_best_recall:
                    current_best_recall = rec
                    print(f'Current best f1 score: {current_best_recall}')
                    best_nrule = nrule
                    best_gamma = gamma
                    print(f'Fold {fold}')
                    print(f'for gamma = {best_gamma}, nrule = {best_nrule}')
                    gather_results[fold] = {'recall': current_best_recall,
                                            'gamma': best_gamma,
                                            'nrule': best_nrule}

    for fold, results in gather_results.items():
        print(f"Results for fold {fold}:")
        for key, value in results.items():
            print(f"  {key}: {value}\n")

    file_name = f'AP_validated_k0_{int(k * 10)}.csv'
    with open(file_name, 'w', newline='') as csvfile:
        fieldnames = ['fold', 'recall', 'gamma', 'nrule']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for fold, results in gather_results.items():
            row = {'fold': fold}
            row.update(results)
            writer.writerow(row)

In [None]:
data = pd.read_csv("raw_data.csv")
data.head()
data = specify_output(data, thr=7.1, ref_column="Ph")
data.head()
data = add_fuzzy_scores(data, "fuzzy-score-FTDT-pi05-full.txt")
data = zscore_normalize(np.array(data))
k_vals = [0.2, 0.3, 0.4, 0.5]

for k in k_vals:
    fold_sets = kfold_cv(data)
    for fold, sets in fold_sets.items():
        train_set, test_set = sets
        train_set, val_set = train_val_split(train_set)
        fold_sets[fold] = (train_set, val_set, test_set)

    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = apply_fuzzy_score(sets=sets, k=k)
        fold_sets[fold] = (train_set, val_set, test_set)

    gamma_set = [0.5, 1.0, 1.5, 2.0]
    nrule_set = [5, 15, 25, 35]

    gather_results = {}
    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = sets
        current_best_recall = 0
        for gamma in gamma_set:
            for nrule in nrule_set:
                model = CBNFS(data=train_set, nrule=nrule, gamma=gamma)
                val_output = model.cbnfse(val_set)

                acc, rec, spec, prec, f1_score = calculate_metrics(val_set[:, -1], val_output)
                if rec > current_best_recall:
                    current_best_recall = rec
                    print(f'Current best f1 score: {current_best_recall}')
                    best_nrule = nrule
                    best_gamma = gamma
                    print(f'Fold {fold}')
                    print(f'for gamma = {best_gamma}, nrule = {best_nrule}')
                    gather_results[fold] = {'recall': current_best_recall,
                                            'gamma': best_gamma,
                                            'nrule': best_nrule}

    for fold, results in gather_results.items():
        print(f"Results for fold {fold}:")
        for key, value in results.items():
            print(f"  {key}: {value}\n")

    file_name = f'PH_validated_k0_{int(k * 10)}.csv'
    with open(file_name, 'w', newline='') as csvfile:
        fieldnames = ['fold', 'recall', 'gamma', 'nrule']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for fold, results in gather_results.items():
            row = {'fold': fold}
            row.update(results)
            writer.writerow(row)

delta DTDT parameter

In [None]:
data = pd.read_csv("raw_data.csv")
data.head()
data = specify_output(data, thr=10, ref_column="Percentile")
data.head()
data = add_fuzzy_scores(data, "fuzzy-score-FTDT-pi05-full.txt")
data = zscore_normalize(np.array(data))
deltas = [0.1, 0.2, 0.3, 0.4, 0.5]

for delta in deltas:
    fold_sets = kfold_cv(data)
    for fold, sets in fold_sets.items():
        train_set, test_set = sets
        train_set, val_set = train_val_split(train_set)
        fold_sets[fold] = (train_set, val_set, test_set)

    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = apply_fuzzy_score(sets=sets, delta=delta)
        fold_sets[fold] = (train_set, val_set, test_set)

    gamma_set = [0.5, 1.0, 1.5, 2.0]
    nrule_set = [5, 15, 25, 35]

    gather_results = {}
    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = sets
        current_best_recall = 0
        for gamma in gamma_set:
            for nrule in nrule_set:
                model = CBNFS(data=train_set, nrule=nrule, gamma=gamma)
                val_output = model.cbnfse(val_set)

                acc, rec, spec, prec, f1_score = calculate_metrics(val_set[:, -1], val_output)
                if rec > current_best_recall:
                    current_best_recall = rec
                    print(f'Current best f1 score: {current_best_recall}')
                    best_nrule = nrule
                    best_gamma = gamma
                    print(f'Fold {fold}')
                    print(f'for gamma = {best_gamma}, nrule = {best_nrule}')
                    gather_results[fold] = {'recall': current_best_recall,
                                            'gamma': best_gamma,
                                            'nrule': best_nrule}

    for fold, results in gather_results.items():
        print(f"Results for fold {fold}:")
        for key, value in results.items():
            print(f"  {key}: {value}\n")

    file_name = f'BW_validated_delta0_{int(delta * 10)}.csv'
    with open(file_name, 'w', newline='') as csvfile:
        fieldnames = ['fold', 'recall', 'gamma', 'nrule']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for fold, results in gather_results.items():
            row = {'fold': fold}
            row.update(results)
            writer.writerow(row)

In [None]:
data = pd.read_csv("raw_data.csv")
data.head()
data = specify_output(data, thr=7, ref_column="Apgar5")
data.head()
data = add_fuzzy_scores(data, "fuzzy-score-FTDT-pi05-full.txt")
data = zscore_normalize(np.array(data))
deltas = [0.1, 0.2, 0.3, 0.4, 0.5]

for delta in deltas:
    fold_sets = kfold_cv(data)
    for fold, sets in fold_sets.items():
        train_set, test_set = sets
        train_set, val_set = train_val_split(train_set)
        fold_sets[fold] = (train_set, val_set, test_set)

    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = apply_fuzzy_score(sets=sets, delta=delta)
        fold_sets[fold] = (train_set, val_set, test_set)

    gamma_set = [0.5, 1.0, 1.5, 2.0]
    nrule_set = [5, 15, 25, 35]

    gather_results = {}
    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = sets
        current_best_recall = 0
        for gamma in gamma_set:
            for nrule in nrule_set:
                model = CBNFS(data=train_set, nrule=nrule, gamma=gamma)
                val_output = model.cbnfse(val_set)

                acc, rec, spec, prec, f1_score = calculate_metrics(val_set[:, -1], val_output)
                if rec > current_best_recall:
                    current_best_recall = rec
                    print(f'Current best f1 score: {current_best_recall}')
                    best_nrule = nrule
                    best_gamma = gamma
                    print(f'Fold {fold}')
                    print(f'for gamma = {best_gamma}, nrule = {best_nrule}')
                    gather_results[fold] = {'recall': current_best_recall,
                                            'gamma': best_gamma,
                                            'nrule': best_nrule}

    for fold, results in gather_results.items():
        print(f"Results for fold {fold}:")
        for key, value in results.items():
            print(f"  {key}: {value}\n")

    file_name = f'AP_validated_delta0_{int(delta * 10)}.csv'
    with open(file_name, 'w', newline='') as csvfile:
        fieldnames = ['fold', 'recall', 'gamma', 'nrule']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for fold, results in gather_results.items():
            row = {'fold': fold}
            row.update(results)
            writer.writerow(row)

In [None]:
data = pd.read_csv("raw_data.csv")
data.head()
data = specify_output(data, thr=7.1, ref_column="Ph")
data.head()
data = add_fuzzy_scores(data, "fuzzy-score-FTDT-pi05-full.txt")
data = zscore_normalize(np.array(data))
deltas = [0.1, 0.2, 0.3, 0.4, 0.5]

for delta in deltas:
    fold_sets = kfold_cv(data)
    for fold, sets in fold_sets.items():
        train_set, test_set = sets
        train_set, val_set = train_val_split(train_set)
        fold_sets[fold] = (train_set, val_set, test_set)

    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = apply_fuzzy_score(sets=sets, delta=delta)
        fold_sets[fold] = (train_set, val_set, test_set)

    gamma_set = [0.5, 1.0, 1.5, 2.0]
    nrule_set = [5, 15, 25, 35]

    gather_results = {}
    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = sets
        current_best_recall = 0
        for gamma in gamma_set:
            for nrule in nrule_set:
                model = CBNFS(data=train_set, nrule=nrule, gamma=gamma)
                val_output = model.cbnfse(val_set)

                acc, rec, spec, prec, f1_score = calculate_metrics(val_set[:, -1], val_output)
                if rec > current_best_recall:
                    current_best_recall = rec
                    print(f'Current best f1 score: {current_best_recall}')
                    best_nrule = nrule
                    best_gamma = gamma
                    print(f'Fold {fold}')
                    print(f'for gamma = {best_gamma}, nrule = {best_nrule}')
                    gather_results[fold] = {'recall': current_best_recall,
                                            'gamma': best_gamma,
                                            'nrule': best_nrule}

    for fold, results in gather_results.items():
        print(f"Results for fold {fold}:")
        for key, value in results.items():
            print(f"  {key}: {value}\n")

    file_name = f'PH_validated_delta0_{int(delta * 10)}.csv'
    with open(file_name, 'w', newline='') as csvfile:
        fieldnames = ['fold', 'recall', 'gamma', 'nrule']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for fold, results in gather_results.items():
            row = {'fold': fold}
            row.update(results)
            writer.writerow(row)

target_ratio DTDT parameter

In [None]:
data = pd.read_csv("raw_data.csv")
data.head()
data = specify_output(data, thr=10, ref_column="Percentile")
data.head()
data = add_fuzzy_scores(data, "fuzzy-score-FTDT-pi05-full.txt")
data = zscore_normalize(np.array(data))
target_ratios = [0.2, 0.3, 0.4, 0.5]

for target_ratio in target_ratios:
    fold_sets = kfold_cv(data)
    for fold, sets in fold_sets.items():
        train_set, test_set = sets
        train_set, val_set = train_val_split(train_set)
        fold_sets[fold] = (train_set, val_set, test_set)

    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = apply_fuzzy_score(sets=sets, target_ratio=target_ratio)
        fold_sets[fold] = (train_set, val_set, test_set)

    gamma_set = [0.5, 1.0, 1.5, 2.0]
    nrule_set = [5, 15, 25, 35]

    gather_results = {}
    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = sets
        current_best_recall = 0
        for gamma in gamma_set:
            for nrule in nrule_set:
                model = CBNFS(data=train_set, nrule=nrule, gamma=gamma)
                val_output = model.cbnfse(val_set)

                acc, rec, spec, prec, f1_score = calculate_metrics(val_set[:, -1], val_output)
                if rec > current_best_recall:
                    current_best_recall = rec
                    print(f'Current best f1 score: {current_best_recall}')
                    best_nrule = nrule
                    best_gamma = gamma
                    print(f'Fold {fold}')
                    print(f'for gamma = {best_gamma}, nrule = {best_nrule}')
                    gather_results[fold] = {'recall': current_best_recall,
                                            'gamma': best_gamma,
                                            'nrule': best_nrule}

    for fold, results in gather_results.items():
        print(f"Results for fold {fold}:")
        for key, value in results.items():
            print(f"  {key}: {value}\n")

    file_name = f'BW_validated_ratio0_{int(target_ratio * 10)}.csv'
    with open(file_name, 'w', newline='') as csvfile:
        fieldnames = ['fold', 'recall', 'gamma', 'nrule']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for fold, results in gather_results.items():
            row = {'fold': fold}
            row.update(results)
            writer.writerow(row)

In [None]:
data = pd.read_csv("raw_data.csv")
data.head()
data = specify_output(data, thr=10, ref_column="Apgar5")
data.head()
data = add_fuzzy_scores(data, "fuzzy-score-FTDT-pi05-full.txt")
data = zscore_normalize(np.array(data))
target_ratios = [0.2, 0.3, 0.4, 0.5]

for target_ratio in target_ratios:
    fold_sets = kfold_cv(data)
    for fold, sets in fold_sets.items():
        train_set, test_set = sets
        train_set, val_set = train_val_split(train_set)
        fold_sets[fold] = (train_set, val_set, test_set)

    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = apply_fuzzy_score(sets=sets, target_ratio=target_ratio)
        fold_sets[fold] = (train_set, val_set, test_set)

    gamma_set = [0.5, 1.0, 1.5, 2.0]
    nrule_set = [5, 15, 25, 35]

    gather_results = {}
    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = sets
        current_best_recall = 0
        for gamma in gamma_set:
            for nrule in nrule_set:
                model = CBNFS(data=train_set, nrule=nrule, gamma=gamma)
                val_output = model.cbnfse(val_set)

                acc, rec, spec, prec, f1_score = calculate_metrics(val_set[:, -1], val_output)
                if rec > current_best_recall:
                    current_best_recall = rec
                    print(f'Current best f1 score: {current_best_recall}')
                    best_nrule = nrule
                    best_gamma = gamma
                    print(f'Fold {fold}')
                    print(f'for gamma = {best_gamma}, nrule = {best_nrule}')
                    gather_results[fold] = {'recall': current_best_recall,
                                            'gamma': best_gamma,
                                            'nrule': best_nrule}

    for fold, results in gather_results.items():
        print(f"Results for fold {fold}:")
        for key, value in results.items():
            print(f"  {key}: {value}\n")

    file_name = f'AP_validated_ratio0_{int(target_ratio * 10)}.csv'
    with open(file_name, 'w', newline='') as csvfile:
        fieldnames = ['fold', 'recall', 'gamma', 'nrule']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for fold, results in gather_results.items():
            row = {'fold': fold}
            row.update(results)
            writer.writerow(row)

In [None]:
data = pd.read_csv("raw_data.csv")
data.head()
data = specify_output(data, thr=7.1, ref_column="Ph")
data.head()
data = add_fuzzy_scores(data, "fuzzy-score-FTDT-pi05-full.txt")
data = zscore_normalize(np.array(data))
target_ratios = [0.2, 0.3, 0.4, 0.5]

for target_ratio in target_ratios:
    fold_sets = kfold_cv(data)
    for fold, sets in fold_sets.items():
        train_set, test_set = sets
        train_set, val_set = train_val_split(train_set)
        fold_sets[fold] = (train_set, val_set, test_set)

    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = apply_fuzzy_score(sets=sets, target_ratio=target_ratio)
        fold_sets[fold] = (train_set, val_set, test_set)

    gamma_set = [0.5, 1.0, 1.5, 2.0]
    nrule_set = [5, 15, 25, 35]

    gather_results = {}
    for fold, sets in fold_sets.items():
        train_set, val_set, test_set = sets
        current_best_recall = 0
        for gamma in gamma_set:
            for nrule in nrule_set:
                model = CBNFS(data=train_set, nrule=nrule, gamma=gamma)
                val_output = model.cbnfse(val_set)

                acc, rec, spec, prec, f1_score = calculate_metrics(val_set[:, -1], val_output)
                if rec > current_best_recall:
                    current_best_recall = rec
                    print(f'Current best f1 score: {current_best_recall}')
                    best_nrule = nrule
                    best_gamma = gamma
                    print(f'Fold {fold}')
                    print(f'for gamma = {best_gamma}, nrule = {best_nrule}')
                    gather_results[fold] = {'recall': current_best_recall,
                                            'gamma': best_gamma,
                                            'nrule': best_nrule}

    for fold, results in gather_results.items():
        print(f"Results for fold {fold}:")
        for key, value in results.items():
            print(f"  {key}: {value}\n")

    file_name = f'PH_validated_ratio0_{int(target_ratio * 10)}.csv'
    with open(file_name, 'w', newline='') as csvfile:
        fieldnames = ['fold', 'recall', 'gamma', 'nrule']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for fold, results in gather_results.items():
            row = {'fold': fold}
            row.update(results)
            writer.writerow(row)