In [1]:
import feature_creator as fc
import pandas as pd
import xgboost as xgb
from xgboost.sklearn import XGBClassifier,XGBRegressor
from sklearn.model_selection import train_test_split,StratifiedShuffleSplit,cross_val_score
from sklearn.base import clone
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor
import numpy as np
from copy import copy
from sklearn import preprocessing

## Get data

In [2]:
df = pd.read_csv('spect_train.csv')
X = df.drop(columns=['OVERALL_DIAGNOSIS']).to_numpy()
y = df['OVERALL_DIAGNOSIS'].to_numpy()

X = preprocessing.normalize(X)
print(X.shape)
print(y.shape)

(80, 44)
(80,)


## Define Classifiers

In [3]:
model =  XGBClassifier()
feature_model = DecisionTreeRegressor(max_depth=5)

In [4]:
#Baseline
scores = cross_val_score(model, X, y, cv=10, scoring='f1')
np.mean(scores)

0.6780952380952381

## Run Feature Creator

In [5]:
feature_models, ga_fitness = fc.feature_creator(model,feature_model,X,y,n_features=2,batch_size=0.5)

## Results

In [6]:
new_X = copy(X)
for f in feature_models:
    new_X = np.hstack((new_X,np.array([fc.get_feature_values(f,X)]).T))

In [7]:
print(new_X.shape)
print(X.shape)

(80, 46)
(80, 44)


In [8]:
new_X

array([[0.135887  , 0.11976481, 0.16122186, ..., 0.15431235, 0.2246454 ,
        0.20040661],
       [0.15591649, 0.13426142, 0.14941997, ..., 0.12559939, 0.11799481,
        0.20040661],
       [0.16373405, 0.14297903, 0.16142794, ..., 0.10608121, 0.41876183,
        0.23006022],
       ...,
       [0.14701894, 0.140436  , 0.16018481, ..., 0.14921325, 0.89131975,
        0.20040661],
       [0.1524338 , 0.14570878, 0.1614005 , ..., 0.09863364, 0.67591099,
        0.17632061],
       [0.14780643, 0.12093254, 0.15452491, ..., 0.13212999, 0.15145097,
        0.20040661]])

In [9]:
X

array([[0.135887  , 0.11976481, 0.16122186, ..., 0.17043454, 0.14740285,
        0.15431235],
       [0.15591649, 0.13426142, 0.14941997, ..., 0.15375098, 0.12126838,
        0.12559939],
       [0.16373405, 0.14297903, 0.16142794, ..., 0.09455065, 0.11761178,
        0.10608121],
       ...,
       [0.14701894, 0.140436  , 0.16018481, ..., 0.175545  , 0.14701894,
        0.14921325],
       [0.1524338 , 0.14570878, 0.1614005 , ..., 0.12553372, 0.13001707,
        0.09863364],
       [0.14780643, 0.12093254, 0.15452491, ..., 0.15228542, 0.13212999,
        0.13212999]])

In [10]:
scores = cross_val_score(model, X, y, cv=10, scoring='f1')
print(f'{np.mean(scores)}+-{np.std(scores)}')

0.6780952380952381+-0.19253588763451576


In [11]:
scores = cross_val_score(model, new_X, y, cv=10, scoring='f1')
print(f'{np.mean(scores)}+-{np.std(scores)}')

0.672936507936508+-0.19436326455691535
