In [None]:
### Examples of possible anaysis of the results obtained from intersection values calculation
### Here the tables with the unsampled results (i.e. the datasets were used at their full sizes) are used
import re
import numpy as np
import pandas as pd
import scipy.stats as stats
import logging
logging.basicConfig(filename='log_analysis.txt',level=logging.INFO)

from chgen.analysis import fishers_exact_test, produce_summary_tables_ch

def extract_float_values_from_string(input_string):
    # Regular expression pattern to find float values in the string
    pattern = r"[-+]?\d*\.\d+|\d+"
    
    # Find all occurrences of the pattern in the input string
    float_values = re.findall(pattern, input_string)
    
    # Convert the found float strings to actual float values
    float_values = [float(value) for value in float_values]
    
    return float_values

# define the names of datasets. Must match those used for intersection values calculation in convex_hull_full.py
df_pairs = ['eICU', 'uka']
count = 27 # number of common parameters. The number can be found in log_ch.txt

# Approach 1 (suitable only for 2-dimensional case):
# Aggregate a table by the case (which dataset's convex hull is used for coverage) and a combination. Based on the grouped
# intersection values the combination of features with the smallest intersection values are found. Condition for the search is
# the lower fence, values that are smaller are considered as those that may include important features (i.e. those that
# discriminate between the sets). If the lower fence is negative, no such combinations are found
data = pd.DataFrame(columns=['Feature1', 'Feature2', 'eICU', 'uka'])
i=0
for combo in combos:
    df = pd.read_csv() #FIXME: fix to the path where the table is stored
    df_mean = df.groupby(['Name', 'Parameters']).mean()
    df_mean = df_mean.reset_index()
    # perform fisher's exact test. The test is performed without p-value correction
    enrich_1 = fishers_exact_test(df_mean, f'full {df_pairs[0]} in full {df_pairs[1]}', count)
    if not enrich_1.empty:
        enrich_1 = enrich_1[enrich_1['p_value'] < 0.05] # filter features based on the significance level. Here as an example 5%
    #symmetric case
    enrich_2 = fishers_exact_test(df_mean, f'full {df_pairs[1]} in full {df_pairs[0]}', count)
    if not enrich_2.empty:
        enrich_2 = enrich_2[enrich_2['p_value'] < 0.05]
    enrich_df = pd.concat([enrich_1, enrich_2]).reset_index(drop = True)
    if not enrich_df.empty:
        # if the table is not empty it contains the list of important features, their count in 'bad' combinations (those with low
        # intersection values) and 'good' combinations (those that are higher than the lower fence), odds and p-values
        enrich_df.to_csv(f'./important_parameters_{df_pairs[0]}_and_{df_pairs[1]}.csv', index=False)
    else:
        logging.info('Bad parameters not found')
    # Approach 2:
    # Compared to the Approach 1, this function first extracts features out of the combinations and assigns an intersection value
    # of a combination to its feature-component. Then computes the median value for each parameter and based on these median values
    # calculates the lower fence. If a median values of a feature is smaller than the lower fence, this feature is considered important.
    # The result of the function is three tables that describe the mean intersection values per a pair of datasets, lists important features
    # that are important for the discrimination and their count. Rows - initial dataset, columns - dataset, convex hull of which is
    # covered by the convex hull of the initial dataset
    path_to_ch_outputs = 'file.csv' #FIXME: fix to the path where the tables are stored
    x,y,z = produce_summary_tables_ch(path_to_ch_outputs, df_pairs)
    x.to_csv('mean_coverage.csv')
    float_values = x.values
    value1 = x.loc['eICU', 'uka']
    value2 = x.loc['uka', 'eICU']
    data.loc[i, 'Feature1'] = combo[0]
    data.loc[i, 'Feature2'] = combo[1]
    data.loc[i, 'eICU'] =value1
    data.loc[i, 'uka'] = value2
    i=i+1


data.to_csv('') # give the name to csv file




In [None]:
### Here the tables with the unsampled results (i.e. the datasets were used at their full sizes) are used
import re
import numpy as np
import pandas as pd
import scipy.stats as stats
import logging
logging.basicConfig(filename='log_analysis.txt',level=logging.INFO)

from chgen.analysis import fishers_exact_test, produce_summary_tables_ch
import inspect
source_code = inspect.getsource(produce_summary_tables_ch)

# Print the function definition
print(source_code)

In [None]:
import re
import numpy as np
import pandas as pd
import scipy.stats as stats
import logging
logging.basicConfig(filename='log_analysis.txt',level=logging.INFO)

from chgen.analysis import fishers_exact_test, produce_summary_tables_ch

# define the names of datasets.
df_pairs = ['eICU', 'uka']

df = pd.read_csv('') #FIXME: fix to the path where the table is stored
df = df[(df['Feature1'] == 'FiO2') | (df['Feature1'] == 'PEEP') | (df['Feature1'] == 'Haemoglobin')| (df['Feature1'] == 'SpO2')| (df['Feature1'] == 'SaO2')]
df = df[df['Feature2'] != 'respiration']
df = df[df['Feature2'] != 'cvp']
df = df[df['Feature2'] != 'GPT']
df = df[df['Feature2'] != 'INR']
df = df[df['Feature2'] != 'GOT']

df = df[df['Feature2'] != 'Laktat_arteriell']
df = df[df['Feature2'] != 'pasystolic']

df = df.reset_index(drop=True)
# Sort the DataFrame based on 'column_name' in ascending order
sorted_df = df.sort_values(by='eICU')
#print(sorted_df)
df_filtered = sorted_df[sorted_df['eICU'] != 0.0]

# Drop the 'column_name' column
df_filtered.drop(columns='Unnamed: 0', inplace=True)



# Combine 'Column1' and 'Column2' into 'Combined'
df_filtered['Combined'] = df_filtered['Feature1'] +" "+ df_filtered['Feature2']



print(df_filtered)



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df_1 = df_filtered[df_filtered.index < 30].copy()
plt.figure(figsize=(100, 24))  # Adjust the width and height as per your requirement

ax = df_1.plot(x="Combined", y=["eICU", "uka"], kind="bar", rot=0)

ticks = ax.get_xticks()
labels = df_1['Combined'].values

# Set the new tick locations and labels
ax.set_xticks(ticks)
ax.set_xticklabels(labels, rotation=90)

ax.set_ylabel("Intersection")

plt.show()

In [None]:
import re
import numpy as np
import pandas as pd
import scipy.stats as stats
import logging
logging.basicConfig(filename='log_analysis.txt',level=logging.INFO)

from chgen.analysis import fishers_exact_test, produce_summary_tables_ch

# define the names of datasets. Must match those used for intersection values calculation in convex_hull_full.py
df_pairs = ['eICU', 'uka']

df = pd.read_csv('') #FIXME: fix to the path where the table is stored


df = df[(df['Feature2'] == 'paO2')]
df = df[df['Feature2'] != 'respiration']
df = df[df['Feature2'] != 'cvp']
df = df[df['Feature2'] != 'GPT']
df = df[df['Feature2'] != 'INR']
df = df[df['Feature2'] != 'GOT']
df = df[df['Feature2'] != 'Laktat_arteriell']
df = df[df['Feature2'] != 'pasystolic']
df = df.reset_index(drop=True)
# Sort the DataFrame based on 'column_name' in ascending order
sorted_df = df.sort_values(by='uka')
#print(sorted_df)
df_filtered = sorted_df[sorted_df['uka'] != 0.0]

# Drop the 'column_name' column
df_filtered.drop(columns='Unnamed: 0', inplace=True)


# Combine 'Column1' and 'Column2' into 'Combined'
df_filtered['Combined'] = df_filtered['Feature1'] +" "+ df_filtered['Feature2']


print(df_filtered)