In [1]:
# Importing Libraries
import numpy as np
import pandas as pd
from functools import reduce
from sklearn.model_selection import train_test_split, RepeatedKFold, StratifiedShuffleSplit, GridSearchCV
# Importing our modules
from pipeline.selection_pipeline import SelectionPipeline
from selection_methods.lasso_method import LassoMethod
from selection_methods.alasso_method import AlassoMethod
from selection_methods.elasticnet_method import ElasticNetMethod
from selection_methods.mlrrf_method import MLRRFMethod
from selection_methods.relieff_method import ReliefFMethod
from selection_methods.svmrfe_method import SVMRFEMethod
from selection_methods.boruta_method import BorutaMethod

In [2]:
# Settings
random_state = 42

In [3]:
# Load and Pre-Process Data 
discovery_set = pd.read_excel('data/discovery_set.xlsx', index_col=0)
discovery_set['state'] = discovery_set.apply(lambda a:0 if a['type']=='N' else 1, axis=1)
plasma_df = discovery_set.copy()

# Make a stratified test set
splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.33, random_state=random_state)
for train_index, test_index in splitter.split(plasma_df, plasma_df['type']):
    train_set, test_set = plasma_df.iloc[train_index], plasma_df.iloc[test_index]

Xtrain_stratified = train_set
Xtest_stratified = test_set

ytrain_stratified = Xtrain_stratified['state']
ytest_stratified = Xtest_stratified['state']

Xtrain = Xtrain_stratified.drop(['state', 'type', 'Batch', 'batch', 'sample_id'], axis=1)
Xtest = Xtest_stratified.drop(['state', 'type', 'Batch', 'batch', 'sample_id'], axis=1)

metas = list(Xtrain.columns)

ytrain = np.array(ytrain_stratified)
ytrain = ytrain.astype(np.float32)

ytest = np.array(ytest_stratified)
ytest = ytest.astype(np.float32)

Xtrain = Xtrain.values
Xtest = Xtest.values

In [4]:
BASE_PANEL = {'Succinate_neg-079', 'Uridine_neg-088', 'S-Adenosyl-methionine_pos-139', 
              'N-Acetyl-D-glucosamine 6-phosphate_neg-061', 'Serotonin_pos-142', 
              'Pyroglutamic acid_neg-072', 'Neopterin_pos-117', 'Lactic acid_neg-055',
              '2-Aminooctanoic acid_pos-006', 'NMN_pos-162'}

# Feature Selection
pipeline = SelectionPipeline()

# Add feature selection methods
pipeline.add_method(LassoMethod(n_features=15, alpha=1.0, max_iter=1000))
pipeline.add_method(BorutaMethod(n_features=15))
pipeline.add_method(ReliefFMethod(n_features=15, n_neighbors=100))

# Apply feature selection pipeline on the data and get the output metabolites from each method
metas_dict = pipeline.apply(Xtrain, ytrain)

# Our Method
differences = []

for metas in metas_dict.values():
    differences.append(metas.difference(BASE_PANEL))

SEF = reduce(set.intersection, differences)
SBP = BASE_PANEL.union(SEF)
print(SBP)


AttributeError: 'numpy.ndarray' object has no attribute 'columns'