In [1]:
%pip install scikit-learn optuna matplotlib seaborn pandas tables tqdm

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [1]:
from sklearn.model_selection import KFold
from sklearn.decomposition import KernelPCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import matthews_corrcoef
import optuna
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


# Data Load

In [2]:
from glob import glob
df = []
for path in glob("data/ID18150/Day2/DataFrame_Imaging_dFF_18150_day2_part_*.h5"):
    df.append(pd.read_hdf(path))
df = pd.concat(df)
# Get a list of columns whose names are of numerical type
numerical_columns = [col for col in df.columns if type(col) == int]

#Feature matrix
X = df[numerical_columns].values
t = df.Time.to_list()
y = df['Pump'].astype(int)
n_features = len(numerical_columns)

# Modelling

In [3]:
n_folds = 20
def objective(trial):
    kf = KFold(n_splits=n_folds)
    mcc_acc = []
    for split in tqdm(kf.split(X), total = n_folds):
        X_train, X_test = X[split[0]], X[split[1]]
        y_train, y_test = y[split[0]], y[split[1]]
        pca = KernelPCA(
            n_components=trial.suggest_int('n_components', int(0.05*n_features), int(0.5*n_features)),
            kernel = trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
            n_jobs = 2,
            degree = 2,
            eigen_solver = 'randomized'
        )
        pca.fit(X_train)
        X_train_pca = pca.transform(X_train)
        X_test_pca = pca.transform(X_test)
        clf = KNeighborsClassifier(n_neighbors=trial.suggest_int('n_neighbors', 3, 50), metric = 'cosine', n_jobs=-1)
        clf.fit(X_train_pca, y_train)
        y_pred = clf.predict(X_test_pca)
        mcc = matthews_corrcoef(y_test, y_pred)
        mcc_acc.append(mcc)
    #Lowest 25th percentile
    return np.percentile(mcc_acc, 25)

In [4]:
study = optuna.create_study(direction='maximize', storage="sqlite:///data/ID18150/Day2/pca_classifier.db", load_if_exists= True, study_name = "pca_optimization_with_kernel")
study.optimize(objective, n_trials=100)

[I 2024-11-07 02:25:16,019] Using an existing study with name 'pca_optimization_with_kernel' instead of creating a new one.
  0%|          | 0/20 [00:00<?, ?it/s]
[W 2024-11-07 02:25:16,641] Trial 4 failed with parameters: {'n_components': 319, 'kernel': 'rbf'} because of the following error: TypeError("KernelPCA.__init__() got an unexpected keyword argument 'eigensolver'").
Traceback (most recent call last):
  File "/usr/local/python/3.10.13/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_530/1236335571.py", line 8, in objective
    pca = KernelPCA(
TypeError: KernelPCA.__init__() got an unexpected keyword argument 'eigensolver'
[W 2024-11-07 02:25:16,643] Trial 4 failed with value None.


TypeError: KernelPCA.__init__() got an unexpected keyword argument 'eigensolver'