In [1]:
import numpy as np
import pandas as pd
import itertools
from scipy.stats import pearsonr
from tmaR import FeatureProcessing
from sklearn import preprocessing
from sklearn import feature_selection as fs
from sklearn.feature_selection import VarianceThreshold
from umap import UMAP
from combat.pycombat import pycombat
from sklearn.preprocessing import StandardScaler

In [2]:
def pairwise_pearsonr(df,threshold=0.95,p_val=0.05):
    
    from random import choice
    
    columns = df.columns.tolist()
    redundant = np.ones(np.size(columns),dtype=bool)

    for col1, col2 in itertools.combinations(columns,2):
        ind1, ind2 = columns.index(col1), columns.index(col2)
        if (redundant[ind1]==False) | (redundant[ind2] == False):
            continue
        else:
            correlation = pearsonr(df[col1],df[col2])
            if (correlation[0]>threshold) & (correlation[1]<p_val):
                redundant[choice([ind1,ind2])]=False
    return redundant

In [None]:
%store -r radiomics
%store -r meta_data
%store -r ICC
%store -r ICC_features

# Extracting batch labels required for combat
batch = pd.factorize(meta_data['TMA'].astype(str) + meta_data['Grid'])[0]
batch = pd.Series(batch)

# Define threshold and p-value criteria
threshold = 0.75
p_value_threshold = 0.05

# Determine reliable and unreliable features
reliable_features = [
    feature for feature in ICC_features
    if (ICC[feature]['ICC'][2] > threshold) and (ICC[feature]['pval'][2] < p_value_threshold)
]

unreliable_features = [feature for feature in ICC_features if feature not in reliable_features]

# Output results
num_reliable = len(reliable_features)
num_unreliable = len(unreliable_features)

print(f"Threshold: {threshold}")
print(f"Reliable features: {num_reliable}")
print(f"Unreliable features: {num_unreliable}")

In [4]:
radiomics = radiomics[reliable_features]

In [None]:
# Apply log transformation
radiomics = np.sign(radiomics) * np.log1p(abs(radiomics))

radiomics_transpose = radiomics.T  # Now features are rows, samples are columns
radiomics_transpose = pycombat(radiomics_transpose, batch)

radiomics = radiomics_transpose.T

In [None]:
redundant_features = pairwise_pearsonr(radiomics,threshold=0.95,p_val=0.05)

In [None]:
# Select columns where the boolean array is True
processed_radiomics = radiomics.loc[:, redundant_features]

# Print selected column names
print("Selected Columns:", list(radiomics.columns))

In [8]:
# Initialize StandardScaler
scaler = StandardScaler()
# Fit and transform the data
processed_radiomics = pd.DataFrame(scaler.fit_transform(processed_radiomics), columns=processed_radiomics.columns, index=processed_radiomics.index)

In [None]:
%store processed_radiomics

In [None]:
processed_radiomics.shape