In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from pathlib import Path

In [2]:
DATA_PATH="/Users/zubrikhina/Documents/datasets/EEG_baseline_with_markers/all_data_features"

In [3]:
DATA_PATH='/Users/zubrikhina/Documents/datasets/eeg_novossibirsk_preproc'

In [4]:
def build_models_feature_selection(X,y):    
    models = {
        'fs_lr_l1': Pipeline([('feature_selection',SelectKBest(chi2, k=50)), 
              ('lg', LogisticRegression(penalty='l1',solver='liblinear', C =1))]),

        'fs_lr_l2': Pipeline([('feature_selection', SelectKBest(chi2, k=50)), 
              ('lg', LogisticRegression(penalty='l2',solver='liblinear',C =1))]),
        'fs_rf_7': Pipeline([('feature_selection', SelectKBest(chi2, k=50)), 
              ('lg', RandomForestClassifier(n_estimators=100, max_depth=7))]),
        'fs_rf_5':Pipeline([('feature_selection', SelectKBest(chi2, k=50)), 
              ('lg', RandomForestClassifier(n_estimators=100, max_depth=5))]),
        'fs_svm_stand': Pipeline([('feature_selection', SelectKBest(chi2, k=50)), 
              ('lg', svm.SVC(C=0.1))]),

        'fs_knn':Pipeline([('feature_selection', SelectKBest(chi2, k=50)), 
              ('lg', KNeighborsClassifier(n_neighbors=5))]),
        'fs_gb': Pipeline([('feature_selection', SelectKBest(chi2, k=50)), 
              ('lg', GradientBoostingClassifier(max_depth=5))]),
        }
    for name, model in models.items():
        try:
            scores = cross_val_score(model, X, y, cv=10, scoring='roc_auc')
            print(name, f"{scores.mean():.3f}+/-{scores.std():.3f}")
        except Exception as e:
            print(e)
    

In [5]:
def build_models_pca(X,y):    
    models = {
        'pca_lr_l1': Pipeline([('pca', PCA(n_components=50)), 
              ('lg', LogisticRegression(penalty='l1',solver='liblinear', C =1))]),

        'pca_lr_l2': Pipeline([('pca', PCA(n_components=50)), 
              ('lg', LogisticRegression(penalty='l2',solver='liblinear',C =1))]),
        'pca_rf_7': Pipeline([('pca', PCA(n_components=50)), 
              ('lg', RandomForestClassifier(n_estimators=100, max_depth=7))]),
        'pca_rf_5':Pipeline([('pca', PCA(n_components=50)), 
              ('lg', RandomForestClassifier(n_estimators=100, max_depth=5))]),
        'pca_svm_stand': Pipeline([('pca', PCA(n_components=60)), 
              ('lg', svm.SVC(C=0.1))]),
        'pca_knn': Pipeline([('pca', PCA(n_components=50)), 
              ('knn', KNeighborsClassifier(n_neighbors=5))]),
        'pca_gb': Pipeline([('pca', PCA(n_components=50)), 
              ('gb', GradientBoostingClassifier(max_depth=5))]),
    }
    for name, model in models.items():
        scores = cross_val_score(model, X, y, cv=10, scoring='roc_auc')
        print(name, f"{scores.mean():.3f}+/-{scores.std():.3f}")
    

In [6]:
def build_models(X,y):    
    models = {
        'lr_l1': LogisticRegression(penalty='l1',solver='liblinear',C =1),
        'lr_l2': LogisticRegression(penalty='l2',solver='liblinear',C =1),
        'rf_7': RandomForestClassifier(n_estimators=100, max_depth=7),
        'rf_5': RandomForestClassifier(n_estimators=100, max_depth=5),
        'svm_stand_0.1': svm.SVC(C=0.1),
        'svm_stand_0.01': svm.SVC(C=0.01),
        'knn': KNeighborsClassifier(n_neighbors=10),
        'gb': GradientBoostingClassifier(max_depth=5)
    }
    for name, model in models.items():
        scores = cross_val_score(model, X, y, cv=10, scoring='f1')
        print(name, f"{scores.mean():.3f}+/-{scores.std():.3f}")
    

In [7]:
for path in Path(DATA_PATH).iterdir():
    print(path.name)
    data =pd.read_csv(path)
    data =data.fillna(0)
    data =data.sample(frac=1).reset_index(drop =True)
    mdd =data.loc[data['target']==1]
    health =data.loc[data['target']==0][:50]
    data =pd.concat([mdd,health])
    data=data.reset_index(drop =True)
    y=data['target']
    X=data.drop(columns=['fn', 'target'])
    build_models(X,y)
    print("_______________")

bands.csv
lr_l1 0.582+/-0.319
lr_l2 0.692+/-0.204
rf_7 0.715+/-0.190
rf_5 0.659+/-0.279
svm_stand_0.1 0.148+/-0.196
svm_stand_0.01 0.000+/-0.000
knn 0.519+/-0.232
gb 0.606+/-0.147
_______________
env.csv
lr_l1 0.574+/-0.111
lr_l2 0.626+/-0.143
rf_7 0.518+/-0.171
rf_5 0.538+/-0.182
svm_stand_0.1 0.000+/-0.000
svm_stand_0.01 0.000+/-0.000
knn 0.539+/-0.154
gb 0.539+/-0.168
_______________
env_theta.csv
lr_l1 0.628+/-0.183
lr_l2 0.591+/-0.251
rf_7 0.619+/-0.169
rf_5 0.627+/-0.072
svm_stand_0.1 0.000+/-0.000
svm_stand_0.01 0.000+/-0.000
knn 0.336+/-0.221
gb 0.492+/-0.135
_______________


In [9]:
for path in Path(DATA_PATH).iterdir():
    print(path.name)
    data =pd.read_csv(path)
    data =data.fillna(0)
    data =data.sample(frac=1).reset_index(drop =True)
    mdd =data.loc[data['target']==1]
    health =data.loc[data['target']==0][:50]
    data =pd.concat([mdd,health])
    data=data.reset_index(drop =True)
    y=data['target']
    X=data.drop(columns=['fn', 'target'])
    build_models_pca(X,y)
    print("_______________")

bands.csv
pca_lr_l1 0.776+/-0.168
pca_lr_l2 0.756+/-0.157
pca_rf_7 0.680+/-0.212
pca_rf_5 0.734+/-0.223
pca_svm_stand 0.717+/-0.137
pca_knn 0.648+/-0.171
pca_gb 0.534+/-0.199
_______________
env.csv
pca_lr_l1 0.718+/-0.158
pca_lr_l2 0.729+/-0.170
pca_rf_7 0.717+/-0.099
pca_rf_5 0.816+/-0.127
pca_svm_stand 0.668+/-0.154
pca_knn 0.656+/-0.178
pca_gb 0.592+/-0.168
_______________
env_theta.csv
pca_lr_l1 0.635+/-0.166
pca_lr_l2 0.656+/-0.207
pca_rf_7 0.711+/-0.192
pca_rf_5 0.727+/-0.131
pca_svm_stand 0.621+/-0.207
pca_knn 0.584+/-0.212
pca_gb 0.644+/-0.125
_______________


In [22]:
for path in Path(DATA_PATH).iterdir():
    print(path.name)
    data =pd.read_csv(path)
    data =data.fillna(0)
    y=data['target']
    X=data.drop(columns=['fn', 'target'])
    build_models_feature_selection(X,y)
    print("_______________")

bands.csv
fs_lr_l1 0.753+/-0.150
fs_lr_l2 0.740+/-0.113
fs_rf_7 0.720+/-0.113
fs_rf_5 0.739+/-0.116
fs_svm_stand 0.778+/-0.113
fs_knn 0.663+/-0.116
fs_gb 0.685+/-0.136
_______________
env_theta.csv
fs_lr_l1 nan+/-nan
fs_lr_l2 nan+/-nan
fs_rf_7 nan+/-nan
fs_rf_5 nan+/-nan


Traceback (most recent call last):
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/sklearn/pipeline.py", line 341, in fit
    Xt = self._fit(X, y, **fit_params_steps)
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/sklearn/pipeline.py", line 303, in _fit
    X, fitted_transformer = fit_transform_one_cached(
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/joblib/memory.py", line 349, in __call__
    return self.func(*args, **kwargs)
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/sklearn/pipeline.py", line 754, in _fit_transform_one
    res = transformer.fit_transform(X, y, **fit_params)
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py", line 702, in fit_transform
    return self.fit(X, y, 

fs_svm_stand nan+/-nan
fs_knn nan+/-nan
fs_gb nan+/-nan
_______________


Traceback (most recent call last):
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/sklearn/pipeline.py", line 341, in fit
    Xt = self._fit(X, y, **fit_params_steps)
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/sklearn/pipeline.py", line 303, in _fit
    X, fitted_transformer = fit_transform_one_cached(
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/joblib/memory.py", line 349, in __call__
    return self.func(*args, **kwargs)
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/sklearn/pipeline.py", line 754, in _fit_transform_one
    res = transformer.fit_transform(X, y, **fit_params)
  File "/Users/zubrikhina/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py", line 702, in fit_transform
    return self.fit(X, y, 