Execution of experiments, collection of results and saving to csv files.

In [None]:
# load datasets

from data import Datasets

datasets = [
    {
        "name": "Prostate",
        "train_size": 100,
    },
    {
        "name": "Throat",
        "train_size": 80,
    },
    {
        "name": "Renal",
        "train_size": 120,
    },
    {
        "name": "Breast",
        "train_size": 260,
    },
]
for ds in datasets:
    ds["data"] = Datasets.get(ds["name"])
    print(ds["name"], ds["data"]["X"].shape)

In [None]:
# common parameters

# number of objects by which the training sets differ
ps = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20]

# maximum number of cross-validation steps, to reduce calculation time
max_cv_steps = 10

# random initial states, in order to obtain reproducibility
random_states = [42, 16, 101]

In [None]:
# Single classifier experiment
# classifiers: LogisticRegression
# datasets: Prostate, Throat, Renal, Breast 


from sklearn.linear_model import LogisticRegression

from src.experiment import experiment


classifiers_lr = [
    {
        "name": "LogisticRegression_C015",
        "estimator": LogisticRegression(C=0.15, penalty='l1', solver='liblinear', max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "LogisticRegression_C05",
        "estimator": LogisticRegression(C=0.5, penalty='l1', solver='liblinear', max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "LogisticRegression_C075",
        "estimator": LogisticRegression(C=0.75, penalty='l1', solver='liblinear', max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
]

results_df = experiment(classifiers_lr, datasets, ps, max_cv_steps, random_states, verbose=True)
filename = f"results/single_classifier/results_lr.csv"
results_df.to_csv(filename, index=False)

In [None]:
# Single classifier experiment
# classifiers: SVM
# datasets: Prostate, Throat, Renal, Breast


from sklearn.svm import LinearSVC

from src.experiment import experiment


classifiers_svm = [
    {
        "name": "SVM_C005",
        "estimator": LinearSVC(C=0.05, penalty="l1", dual=False, max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "SVM_C035",
        "estimator": LinearSVC(C=0.35, penalty="l1", dual=False, max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "SVM_C06",
        "estimator": LinearSVC(C=0.6, penalty="l1", dual=False, max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
]

results_df = experiment(classifiers_svm, datasets, ps, max_cv_steps, random_states, verbose=True)
filename = f"results/single_classifier/results_svm.csv"
results_df.to_csv(filename, index=False)

In [None]:
# Single classifier experiment
# classifiers: CPL
# datasets: Prostate, Throat, Renal, Breast


from src.cpl.classifiers import GenetClassifier

from src.experiment import experiment


classifiers_cpl = [
    {
        "name": "CPL_C055",
        "estimator": GenetClassifier(C=0.55),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "CPL_C08",
        "estimator": GenetClassifier(C=0.8),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "CPL_C092",
        "estimator": GenetClassifier(C=0.92),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
]

results_df = experiment(classifiers_cpl, datasets, ps, max_cv_steps, random_states, verbose=True)
filename = f"results/single_classifier/results_cpl.csv"
results_df.to_csv(filename, index=False)

In [None]:
# Single classifier experiment
# classifiers: RandomForest
# datasets: Prostate, Throat, Renal, Breast

from sklearn.ensemble import RandomForestClassifier

from src.experiment import experiment


classifiers_rf = [
    {
        "name": "RandomForest_t6e-3",
        "estimator": RandomForestClassifier(n_estimators=100, random_state=0, min_samples_leaf=2),
        "threshold": 6e-3,
        "importance_getter": "auto"
    },
    {
        "name": "RandomForest_t4e-3",
        "estimator": RandomForestClassifier(n_estimators=100, random_state=0, min_samples_leaf=2),
        "threshold": 4e-3,
        "importance_getter": "auto"
    },
    {
        "name": "RandomForest_t2e-3",
        "estimator": RandomForestClassifier(n_estimators=100, random_state=0, min_samples_leaf=2),
        "threshold": 2e-3,
        "importance_getter": "auto"
    },
]

results_df = experiment(classifiers_rf, datasets, ps, max_cv_steps, random_states, verbose=True)
filename = f"results/single_classifier/results_rf.csv"
results_df.to_csv(filename, index=False)

In [None]:
# Multi classifiers experiment
# classifiers: CPL, RandomForest, LogisticRegression, SVM
# datasets: Prostate

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

from src.cpl.classifiers import GenetClassifier
from src.experiment import experiment


classifiers_prostate = [
    {
        "name": "RandomForest",
        "estimator": RandomForestClassifier(n_estimators=100, random_state=0, min_samples_leaf=2),
        "threshold": 3.9e-3,
        "importance_getter": "auto"
    },
    {
        "name": "LogisticRegression",
        "estimator": LogisticRegression(C=0.33, penalty='l1', solver='liblinear', max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "SVM",
        "estimator": LinearSVC(C=0.09, penalty="l1", dual=False, max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "CPL",
        "estimator": GenetClassifier(C=0.947),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
] 

results_df = experiment(classifiers_prostate, datasets[0:1], ps, max_cv_steps, random_states, verbose=True)
filename = f"results/multi_classifiers/results_prostate.csv"
results_df.to_csv(filename, index=False)

In [None]:
# Multi classifiers experiment
# classifiers: CPL, RandomForest, LogisticRegression, SVM
# datasets: Throat

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

from src.cpl.classifiers import GenetClassifier
from src.experiment import experiment


classifiers_throat = [
    {
        "name": "RandomForest",
        "estimator": RandomForestClassifier(n_estimators=100, random_state=0, min_samples_leaf=2),
        "threshold": 7.2e-3,
        "importance_getter": "auto"
    },
    {
        "name": "LogisticRegression",
        "estimator": LogisticRegression(C=0.035, penalty='l1', solver='liblinear', max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "SVM",
        "estimator": LinearSVC(C=0.1, penalty="l1", dual=False, max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "CPL",
        "estimator": GenetClassifier(C=0.93),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
]

from src.experiment import experiment
from datetime import datetime 

results_df = experiment(classifiers_throat, datasets[1:2], ps, max_cv_steps, random_states, verbose=True)
filename = f"results/multi_classifiers/results_thoat.csv"
results_df.to_csv(filename, index=False)

In [None]:
# Multi classifiers experiment
# classifiers: CPL, RandomForest, LogisticRegression, SVM
# datasets: Renal

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

from src.cpl.classifiers import GenetClassifier
from src.experiment import experiment


classifiers_renal = [
    {
        "name": "RandomForest",
        "estimator": RandomForestClassifier(n_estimators=100, random_state=0, min_samples_leaf=2),
        "threshold": 6.8e-3,
        "importance_getter": "auto"
    },
    {
        "name": "LogisticRegression",
        "estimator": LogisticRegression(C=0.097, penalty='l1', solver='liblinear', max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "SVM",
        "estimator": LinearSVC(C=0.04, penalty="l1", dual=False, max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "CPL",
        "estimator": GenetClassifier(C=0.88),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
]

results_df = experiment(classifiers_renal, datasets[2:3], ps, max_cv_steps, random_states, verbose=True)
filename = f"results/multi_classifiers/results_renal.csv"
results_df.to_csv(filename, index=False)

In [None]:
# Multi classifiers experiment
# classifiers: CPL, RandomForest, LogisticRegression, SVM
# datasets: Breast

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

from src.cpl.classifiers import GenetClassifier
from src.experiment import experiment


classifiers_breast = [
    {
        "name": "RandomForest",
        "estimator": RandomForestClassifier(n_estimators=100, random_state=0, min_samples_leaf=2),
        "threshold": 4.65e-3,
        "importance_getter": "auto"
    },
    {
        "name": "LogisticRegression",
        "estimator": LogisticRegression(C=0.118, penalty='l1', solver='liblinear', max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "SVM",
        "estimator": LinearSVC(C=0.042, penalty="l1", dual=False, max_iter=100000),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
    {
        "name": "CPL",
        "estimator": GenetClassifier(C=0.946),
        "threshold": 1e-8,
        "importance_getter": "auto"
    },
]

results_df = experiment(classifiers_breast, datasets[3:4], ps, max_cv_steps, random_states, verbose=True)
filename = f"results/multi_classifiers/results_breast.csv"
results_df.to_csv(filename, index=False)