In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

################################################################################
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

################################################################################
from google.colab import drive
drive.mount('/content/drive')
!ls '/content/drive/My Drive'

Wed Jul 31 08:52:19 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA L4                      Off | 00000000:00:03.0 Off |                    0 |
| N/A   52C    P8              13W /  72W |      1MiB / 23034MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.expand_frame_repr', False)

import warnings
warnings.filterwarnings("ignore")

import os

from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from mlxtend.evaluate import confusion_matrix
from sklearn.metrics import roc_auc_score


import joblib
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import OneSidedSelection


from collections import Counter
from matplotlib import pyplot
from sklearn.model_selection import learning_curve

import time


In [3]:
# modelName = m
# X = X_train
# y = y_train

In [4]:
# Create a function to define parameters for different models
def search_space(modelname):
    if modelname == "lr":
        space = dict()
        space["penalty"] = ["none", "l2"]
        space["solver"] = ["newton-cg", "lbfgs"]
        space["C"] = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]

    elif modelname == "rf":
        space = dict()
        space["n_estimators"] = [30, 50, 100, 200]
        space["max_features"] = ['sqrt', 'log2']
        space["max_depth"] = [10, 14, 16]
        space["min_samples_split"] = [2, 5, 10]
        space["min_samples_leaf"] = [1, 2, 5]
        space["bootstrap"] = [True, False]
        space["criterion"] = ["gini", "entropy"]

        # space["n_estimators"] = [30, 50, 100, 200, 500]
        # space["max_features"] = ['sqrt', 'log2']
        # space["max_depth"] = [2, 4, 8, 10, 14]
        # space["min_samples_split"] = [2, 5, 10, 20]
        # space["min_samples_leaf"] = [1, 2, 5, 10]
        # space["bootstrap"] = [True, False]
        space["criterion"] = ["gini", "entropy"]

        # space["n_estimators"] = [30, 50, 100]
        # space["max_features"] = ['sqrt', 'log2']
        # space["max_depth"] = [4, 8]
        # space["min_samples_split"] = [2, 5]
        # space["min_samples_leaf"] = [2, 5]
        # space["bootstrap"] = [True, False]
        # space["criterion"] = ["gini", "entropy"]

    elif modelname == "xgb":
        space = dict()
        space["n_estimators"] = [100, 200, 300, 500]
        space["max_depth"] = [6, 9, 12]
        space["min_child_weight"] = [1, 3, 5]
        space["subsample"] = [0.4, 0.5, 0.7]
        space["colsample_bytree"] = [0.7, 0.8]
        space["learning_rate"] = [0.01, 0.05, 0.1]
        space["gamma"] = [0.1, 1]
        # space["reg_alpha"] = [0, 0.01, 0.1, 1]
        # space["reg_lambda"] = [0, 0.01, 0.1, 1]

        # space["n_estimators"] = [50, 100, 200, 300, 500]
        # space["max_depth"] = [3, 6, 9, 12]
        # space["min_child_weight"] = [1, 3, 5, 10]
        # space["subsample"] = [0.5, 0.7, 0.8]
        # space["colsample_bytree"] = [0.5, 0.7, 0.8]
        # space["learning_rate"] = [0.01, 0.05, 0.1]
        # space["gamma"] = [0.1, 0.5, 1]
        # # space["reg_alpha"] = [0, 0.01, 0.1, 1]
        # # space["reg_lambda"] = [0, 0.01, 0.1, 1]

        # space["n_estimators"] = [50, 100, 200]
        # space["max_depth"] = [6, 9]
        # space["min_child_weight"] = [5, 10]
        # space["subsample"] = [0.5, 0.7]
        # space["colsample_bytree"] = [0.5, 0.7]
        # space["learning_rate"] = [0.01, 0.05]
        # space["gamma"] = [0.1, 0.5]
        # space["reg_alpha"] = [0.01, 0.1]
        # space["reg_lambda"] = [0.01, 0.1]

    elif modelname == "dt":
        space = dict()
        space["max_depth"] = [2, 4, 8, 10, 20, 30]
        space["min_samples_split"] = [2, 5, 10, 20]
        space["min_samples_leaf"] = [1, 2, 5, 10]
        space["criterion"] = ["gini", "entropy"]
        space["splitter"] = ["best", "random"]

    elif modelname == "svm":
        space = dict()
        # space["C"] = [0.1, 1, 10, 100, 1000]
        # space["gamma"] = [0.001, 0.01, 0.1, 1, 10]
        # space["kernel"] = ["linear", "rbf", "poly", "sigmoid"]
        # space["degree"] = [2, 3, 4]
        # space["coef0"] = [0.0, 0.1, 0.5, 1.0]

        space["C"] = [0.1, 1, 10]
        space["gamma"] = [0.001, 0.01]
        space["kernel"] = ["linear", "rbf", "poly", "sigmoid"]
        space["degree"] = [2, 3]
        space["coef0"] = [0.0, 0.1]

    elif modelname == "nb":
        space = dict()
        space["var_smoothing"] = np.logspace(0, -9, num=10)

    elif modelname == "ada":
        space = dict()
        space["n_estimators"] = [100, 200, 400, 500]
        space["learning_rate"] = [0.01, 0.1, 0.5, 1]

        # space["n_estimators"] = [30, 50, 100, 200, 400, 500]
        # space["learning_rate"] = [0.01, 0.1, 0.5, 1]

    elif modelname == "gb":
        space = dict()
        space["n_estimators"] = [100, 200, 400, 500]
        space["max_depth"] = [3, 5, 10, 12]
        space["min_samples_split"] = [2, 5, 10]
        space["min_samples_leaf"] = [1, 2, 4]
        space["learning_rate"] = [0.05, 0.1]
        space["subsample"] = [0.7, 0.8, 0.9]

        # space["n_estimators"] = [30, 50, 100, 200, 400, 500]
        # space["max_depth"] = [3, 5, 7, 10, 12]
        # space["min_samples_split"] = [2, 5, 10]
        # space["min_samples_leaf"] = [1, 2, 4]
        # space["learning_rate"] = [0.01, 0.1, 0.05]
        # space["subsample"] = [0.7, 0.8, 0.9, 1.0]

        # space["n_estimators"] = [50, 100]
        # space["max_depth"] = [3, 5, 7]
        # space["min_samples_split"] = [5, 10]
        # space["min_samples_leaf"] = [2, 4]
        # space["learning_rate"] = [0.01, 0.15]
        # space["subsample"] = [0.7, 0.85]

    return space


In [5]:
def plot_learning_curve(estimator, title, X, y, cv=None, n_jobs=None, train_sizes=np.linspace(0.1, 1.0, 5)):
    plt.figure()
    plt.title(title)
    plt.xlabel("Training examples")
    plt.ylabel("Score")

    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes, scoring='f1')

    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    plt.grid()

    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.1, color="g")
    plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
             label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
             label="Cross-validation score")

    plt.legend(loc="best")
    return plt


# Base model with randomized search function
def modelTraining(modelName, X, y, metric):

    if modelName == "lr":
        model = LogisticRegression()
    elif modelName == "rf":
        model = RandomForestClassifier()
    elif modelName == "xgb":
        model = XGBClassifier()
    elif modelName == "dt":
        model = DecisionTreeClassifier()
    elif modelName == "svm":
        model = SVC()
    elif modelName == "nb":
        model = GaussianNB()
    elif modelName == "ada":
        model = AdaBoostClassifier()
    elif modelName == "gb":
        model = GradientBoostingClassifier()
    else:
        raise ValueError("Unknown model name")

    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    grid = search_space(modelName)

    search = GridSearchCV(model, grid, scoring=metric, n_jobs=-1, cv=cv, verbose=3, return_train_score=True)
    result = search.fit(X, y)

    cv_results = pd.DataFrame(result.cv_results_)
    columns_to_display = [
        'params',
        'mean_train_score',
        'std_train_score',
        'mean_test_score',
        'std_test_score',
        'rank_test_score'
    ]
    cv_results_filtered = cv_results[columns_to_display]
    cv_results_filtered

    cv_results_filtered['diff'] = cv_results_filtered['mean_train_score'] - cv_results_filtered['mean_test_score']
    cv_results_filtered = cv_results_filtered.sort_values(['diff'], ascending=True).reset_index(drop=True)
    hyperparameters = cv_results_filtered[cv_results_filtered['rank_test_score'] == 1]['params'].reset_index(drop=True)[0]
    hyperparameters

    if modelName == "lr":
        model_v2 = LogisticRegression(**hyperparameters)
    elif modelName == "rf":
        model_v2 = RandomForestClassifier(**hyperparameters)
    elif modelName == "xgb":
        model_v2 = XGBClassifier(**hyperparameters)
    elif modelName == "dt":
        model_v2 = DecisionTreeClassifier(**hyperparameters)
    elif modelName == "svm":
        model_v2 = SVC(**hyperparameters)
    elif modelName == "nb":
        model_v2 = GaussianNB(**hyperparameters)
    elif modelName == "ada":
        model_v2 = AdaBoostClassifier(**hyperparameters)
    elif modelName == "gb":
        model_v2 = GradientBoostingClassifier(**hyperparameters)

    model_v2.fit(X, y)

    return model_v2, cv_results_filtered


In [6]:
def error_metrics_classification(y_true, y_pred):

    acc = accuracy_score(y_true, y_pred)
    pres = precision_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)

    summary = pd.DataFrame([[str(round(acc,3)),
                             str(round(pres,3)),
                             str(round(f1,3)),
                             str(round(recall,3)),
                             str(round(auc,3))
                            ]],
                            columns=['Accuracy', 'Precision',
                                     'F1','Recall', 'AUC'])

    return summary


def train_test_metrics(modelObj, X, y, Xt, yt, modelName):
    train_metrics = error_metrics_classification(modelObj, X, y, modelName)
    train_metrics.columns = train_metrics.columns + "_train"
    test_metrics = error_metrics_classification(modelObj, Xt, yt, modelName)
    test_metrics = test_metrics.drop(["Search", "Model"], axis=1)
    test_metrics.columns = test_metrics.columns + "_test"
    tt_metrics = pd.concat([train_metrics, test_metrics], axis=1)
    tt_metrics = tt_metrics.rename(
        columns={"Search_train": "Search", "Model_train": "Model"}
    )
    tt_metrics
    return tt_metrics



In [7]:
# train = kc_op_train.copy()
# val = kc_op_val.copy()
# test = kc_op_test.copy()
# target = "target"
# model_names

# # "2class"
# model_path

In [8]:
def run_eval(input_data, target, model_names, model_path, metric_to_optimize):
    all_models_tvt_comp = pd.DataFrame()

    X = input_data.drop([target], axis=1)
    y = input_data[target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)


    print(X_train.shape, y_train.shape)
    print(X_test.shape, y_test.shape)

    for m in model_names:
        model_obj, cv_results = modelTraining(m, X_train, y_train, metric_to_optimize)

        hyp_path = (f'{model_path}/{"model_building_op/best_parameters/all_hyp_params_"+ m+ ".csv"}')
        cv_results.to_csv(hyp_path, index=False)

        # Save the model as a pickle in a file
        pkl_name = (f'{model_path}/{"model_building_op/pkl_files/pkl_"+ m+ ".pkl"}')
        joblib.dump(model_obj, pkl_name)

        train_data = X_train.copy()
        train_data["actuals"] = y_train
        train_data["preds"] = model_obj.predict(X_train)
        train_data["pred_probability"] = model_obj.predict_proba(X_train)[:, 1]

        test_data = X_test.copy()
        test_data["actuals"] = y_test
        test_data["preds"] = model_obj.predict(X_test)
        test_data["pred_probability"] = model_obj.predict_proba(X_test)[:, 1]

        # Saving train test val data with actuals and predictions ##
        train_path = (f'{model_path}/{"model_building_op/act_pred_train_test/train_"+ m+ ".csv"}')
        test_path = (f'{model_path}/{"model_building_op/act_pred_train_test/test_"+ m+ ".csv"}')

        train_data.to_csv(train_path, index=False)
        test_data.to_csv(test_path, index=False)

        train_eval = error_metrics_classification(train_data["actuals"], train_data["preds"])
        train_eval.columns = train_eval.columns + "_train"

        test_eval = error_metrics_classification(test_data["actuals"], test_data["preds"])
        test_eval.columns = test_eval.columns + "_test"

        tvt_eval = pd.concat([train_eval, test_eval], axis=1)
        tvt_eval['Model'] = m
        # all_tt_model_eval = pd.concat([all_tt_model_eval, tt_eval], axis = 0)

        temp_err_path = (f'{model_path}/{"model_building_op/model_perf/eval_metrics_"+ m + ".csv"}')
        tvt_eval.to_csv(temp_err_path, index=False)
        all_models_tvt_comp = pd.concat([all_models_tvt_comp, tvt_eval], axis=0)
        # check Important features
        if m in ["dt", "rf", "xgb", 'ada', 'gb']:
            feature_importances_df = pd.DataFrame({"feature": list(X_train.columns),
                                                   "importance": model_obj.feature_importances_
                                                  }).sort_values("importance", ascending=False)
            feature_importances_df.to_csv(f'{model_path}/{"model_building_op/imp_features/imp_feat_"+ m+ ".csv"}',index=False)
    err_path = (f'{model_path}/{"model_building_op/model_perf/eval_metrics.csv"}')
    all_models_tvt_comp.to_csv(err_path, index=False)
    return all_models_tvt_comp




In [9]:
def run_eval_smote(input_data, target, model_names, model_path, metric_to_optimize):
    all_models_tvt_comp = pd.DataFrame()

    X = input_data.drop([target], axis=1)
    y = input_data[target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

    print(X_train.shape, y_train.shape)
    print(X_test.shape, y_test.shape)

    # transform the dataset
    oversample = SMOTE()
    X_train_balanced, y_train_balanced = oversample.fit_resample(X_train, y_train)

    for m in model_names:
        model_obj, cv_results = modelTraining(m, X_train_balanced, y_train_balanced, metric_to_optimize)

        hyp_path = (f'{model_path}/{"model_building_op/best_parameters/all_hyp_params_"+ m+ "_smote.csv"}')
        cv_results.to_csv(hyp_path, index=False)

        # Save the model as a pickle in a file
        pkl_name = (f'{model_path}/{"model_building_op/pkl_files/pkl_"+ m+ "_smote.pkl"}')
        joblib.dump(model_obj, pkl_name)


        train_data = X_train.copy()
        train_data["actuals"] = y_train
        train_data["preds"] = model_obj.predict(X_train)
        train_data["pred_probability"] = model_obj.predict_proba(X_train)[:, 1]

        test_data = X_test.copy()
        test_data["actuals"] = y_test
        test_data["preds"] = model_obj.predict(X_test)
        test_data["pred_probability"] = model_obj.predict_proba(X_test)[:, 1]

        # Saving train test val data with actuals and predictions ##
        train_path = (f'{model_path}/{"model_building_op/act_pred_train_test/train_"+ m+ "_smote.csv"}')
        test_path = (f'{model_path}/{"model_building_op/act_pred_train_test/test_"+ m+ "_smote.csv"}')

        train_data.to_csv(train_path, index=False)
        test_data.to_csv(test_path, index=False)

        train_eval = error_metrics_classification(train_data["actuals"], train_data["preds"])
        train_eval.columns = train_eval.columns + "_train"

        test_eval = error_metrics_classification(test_data["actuals"], test_data["preds"])
        test_eval.columns = test_eval.columns + "_test"

        tvt_eval = pd.concat([train_eval, test_eval], axis=1)
        tvt_eval['Model'] = m
        # all_tt_model_eval = pd.concat([all_tt_model_eval, tt_eval], axis = 0)

        temp_err_path = (f'{model_path}/{"model_building_op/model_perf/eval_metrics_"+ m + "_smote.csv"}')
        tvt_eval.to_csv(temp_err_path, index=False)
        all_models_tvt_comp = pd.concat([all_models_tvt_comp, tvt_eval], axis=0)
        # check Important features
        if m in ["dt", "rf", "xgb", 'ada', 'gb']:
            feature_importances_df = pd.DataFrame({"feature": list(X_train.columns),
                                                   "importance": model_obj.feature_importances_
                                                  }).sort_values("importance", ascending=False)
            feature_importances_df.to_csv(f'{model_path}/{"model_building_op/imp_features/imp_feat_"+ m+ "_smote.csv"}',index=False)
    err_path = (f'{model_path}/{"model_building_op/model_perf/eval_metrics_smote.csv"}')
    all_models_tvt_comp.to_csv(err_path, index=False)
    return all_models_tvt_comp




In [10]:
# exp_code = 'experiment_250'
# model_names = models
# monitor = 'f1'

In [11]:
def run_experiment(exp_code, model_names, monitor):

    # Record the start time
    start_time = time.time()

    version_path = "/content/drive/My Drive/keratoconus/data/model_versions/"
    model_version = exp_code
    artifacts = ['bin_class','eval_metrics','model_building_op']
    model_artifacts = ['act_pred_train_test', 'best_parameters','imp_features','model_perf', 'pkl_files']

    datasets_path = f'{version_path}/{model_version}/{"datasets"}'
    model_path = f'{version_path}/{model_version}/{"training"}'

    print(datasets_path)
    print(model_path)

    # Record the start time
    start_time = time.time()

    version_path = "/content/drive/My Drive/keratoconus/data/model_versions/"
    model_version = exp_code
    artifacts = ['bin_class','eval_metrics','model_building_op']
    model_artifacts = ['act_pred_train_test', 'best_parameters','imp_features','model_perf', 'pkl_files']

    datasets_path = f'{version_path}/{model_version}/{"datasets"}'
    model_path = f'{version_path}/{model_version}/{"training"}'

    print(datasets_path)
    print(model_path)

    kc_op = pd.read_csv(f'{datasets_path}/{"kc_op.csv"}')
    print(kc_op.shape)

    kc_op = kc_op.drop(['uid', 'image_ref'], axis = 1)
    print(kc_op.shape)

    run_eval_smote(kc_op, 'target', model_names, model_path, monitor)
    run_eval(kc_op, 'target', model_names, model_path, monitor)


    # Record the end time
    end_time = time.time()

    # Calculate the time taken
    time_taken = end_time - start_time

    # Print the start time, end time, and time taken
    print(f"Start Time: {time.ctime(start_time)}")
    print(f"End Time: {time.ctime(end_time)}")
    print(f"Time Taken: {time_taken:.2f} seconds")



In [12]:
models = ["ada", "gb", "lr", "dt", "rf", "xgb"]

In [13]:
run_experiment('experiment_252', models, 'f1')

/content/drive/My Drive/keratoconus/data/model_versions//experiment_252/datasets
/content/drive/My Drive/keratoconus/data/model_versions//experiment_252/training
/content/drive/My Drive/keratoconus/data/model_versions//experiment_252/datasets
/content/drive/My Drive/keratoconus/data/model_versions//experiment_252/training
(2576, 24)
(2576, 22)
(1803, 21) (1803,)
(773, 21) (773,)
Fitting 5 folds for each of 16 candidates, totalling 80 fits
Fitting 5 folds for each of 864 candidates, totalling 4320 fits
Fitting 5 folds for each of 28 candidates, totalling 140 fits
Fitting 5 folds for each of 384 candidates, totalling 1920 fits
Fitting 5 folds for each of 864 candidates, totalling 4320 fits
Fitting 5 folds for each of 1296 candidates, totalling 6480 fits
(1803, 21) (1803,)
(773, 21) (773,)
Fitting 5 folds for each of 16 candidates, totalling 80 fits
Fitting 5 folds for each of 864 candidates, totalling 4320 fits
Fitting 5 folds for each of 28 candidates, totalling 140 fits
Fitting 5 folds

In [14]:
run_experiment('experiment_251', models, 'f1')

/content/drive/My Drive/keratoconus/data/model_versions//experiment_251/datasets
/content/drive/My Drive/keratoconus/data/model_versions//experiment_251/training
/content/drive/My Drive/keratoconus/data/model_versions//experiment_251/datasets
/content/drive/My Drive/keratoconus/data/model_versions//experiment_251/training
(1555, 24)
(1555, 22)
(1088, 21) (1088,)
(467, 21) (467,)
Fitting 5 folds for each of 16 candidates, totalling 80 fits
Fitting 5 folds for each of 864 candidates, totalling 4320 fits
Fitting 5 folds for each of 28 candidates, totalling 140 fits
Fitting 5 folds for each of 384 candidates, totalling 1920 fits
Fitting 5 folds for each of 864 candidates, totalling 4320 fits
Fitting 5 folds for each of 1296 candidates, totalling 6480 fits
(1088, 21) (1088,)
(467, 21) (467,)
Fitting 5 folds for each of 16 candidates, totalling 80 fits
Fitting 5 folds for each of 864 candidates, totalling 4320 fits
Fitting 5 folds for each of 28 candidates, totalling 140 fits
Fitting 5 folds

In [None]:
run_experiment('experiment_250', models, 'f1')

/content/drive/My Drive/keratoconus/data/model_versions//experiment_250/datasets
/content/drive/My Drive/keratoconus/data/model_versions//experiment_250/training
/content/drive/My Drive/keratoconus/data/model_versions//experiment_250/datasets
/content/drive/My Drive/keratoconus/data/model_versions//experiment_250/training
(4131, 24)
(4131, 22)
(2891, 21) (2891,)
(1240, 21) (1240,)
Fitting 5 folds for each of 16 candidates, totalling 80 fits
Fitting 5 folds for each of 864 candidates, totalling 4320 fits
Fitting 5 folds for each of 28 candidates, totalling 140 fits
Fitting 5 folds for each of 384 candidates, totalling 1920 fits
Fitting 5 folds for each of 864 candidates, totalling 4320 fits
Fitting 5 folds for each of 1296 candidates, totalling 6480 fits
(2891, 21) (2891,)
(1240, 21) (1240,)
Fitting 5 folds for each of 16 candidates, totalling 80 fits
Fitting 5 folds for each of 864 candidates, totalling 4320 fits


In [None]:
# 'experiment_250'

# /content/drive/My Drive/keratoconus/data/model_versions//experiment_250/datasets
# /content/drive/My Drive/keratoconus/data/model_versions//experiment_250/training
# /content/drive/My Drive/keratoconus/data/model_versions//experiment_250/datasets
# /content/drive/My Drive/keratoconus/data/model_versions//experiment_250/training
# (4131, 24)
# (4131, 22)
# (2891, 21) (2891,)
# (1240, 21) (1240,)
# Fitting 5 folds for each of 24 candidates, totalling 120 fits
# Fitting 5 folds for each of 3240 candidates, totalling 16200 fits
# Fitting 5 folds for each of 28 candidates, totalling 140 fits
# Fitting 5 folds for each of 384 candidates, totalling 1920 fits
# Fitting 5 folds for each of 3200 candidates, totalling 16000 fits
# Fitting 5 folds for each of 6480 candidates, totalling 32400 fits
# (2891, 21) (2891,)
# (1240, 21) (1240,)
# Fitting 5 folds for each of 24 candidates, totalling 120 fits
# Fitting 5 folds for each of 3240 candidates, totalling 16200 fits
# Fitting 5 folds for each of 28 candidates, totalling 140 fits
# Fitting 5 folds for each of 384 candidates, totalling 1920 fits
# Fitting 5 folds for each of 3200 candidates, totalling 16000 fits
# Fitting 5 folds for each of 6480 candidates, totalling 32400 fits
# Start Time: Mon Jul 29 07:31:39 2024
# End Time: Mon Jul 29 14:30:51 2024
# Time Taken: 25152.33 seconds