In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_auc_score, average_precision_score

In [3]:
MASK_FEATS = ["id", "label"]
XL_PATH = r"inputs/radiomicsFeatures.csv"
OUT_DIR = r"outputs/random"

NUM_REPEATS = 100

In [4]:
feats_df = pd.read_csv(XL_PATH)

In [5]:
pids = feats_df.id.to_numpy()
labels = feats_df.label.to_numpy()

### Feature Selection Pipeline

In [6]:
class RandomFS(object):

    def __init__(self):
        pass

    def __call__(self, feats_df, mask_feats):

        
        features = feats_df.columns[~feats_df.columns.isin(mask_feats)].to_list()
        ranks = np.arange(len(features))+1
        ranks = np.random.permutation(ranks).tolist()

        rank_df = pd.DataFrame({"feature":features, "rank":ranks})

        return rank_df


In [7]:
if not os.path.exists(OUT_DIR):
    os.makedirs(OUT_DIR)
    
for i in range(NUM_REPEATS):

    rank_df = RandomFS()(feats_df, MASK_FEATS)
    rank_df.to_csv(os.path.join(OUT_DIR, f"rank_df{i}.csv"), index=False)