In [5]:
!pip install scikit-optimize

import os
import sys
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skopt import gp_minimize
from skopt.space import Real
from skopt.utils import use_named_args
from google.colab import drive

# Mount Google Drive for access
drive.mount('/content/drive')

# Ensure the blink_detection_functions module is on the path
module_path = '/content/drive/MyDrive/Blink Detection/Code'
if module_path not in sys.path:
    sys.path.append(module_path)

from blink_detection_functions import (bandpass_filter, remove_outliers, scale_signal, reduce_noise, identify_troughs_and_stable_points, analyze_troughs, identify_blinks, third_pass, validate_blinks)

# Database connection
db_path = '/content/drive/MyDrive/Blink Detection/Datasets/BLINKData_Labeled.db'
conn = sqlite3.connect(db_path)
tables = pd.read_sql("SELECT name FROM sqlite_master WHERE type='table';", conn)['name'].tolist()

# Digital Filtering Settings
lowcut = 0.3 # In HZ the lowcut used by the bandpass filter
highcut = 28 # In Hz the highcut used by the bandpass filter
sample_rate = 250 # The sampling date of the data you're using
noise_threshold = 0.35444163267506573 # The systems removes any troughs smaller than the noise treshold to help make sure only blinks get through

# Outliers
outlier_threshold = 10 # Determines how far from the upper and lowerbound values that a point must be at to count as an outlier
outlier_lowerbound = 56.3283293856677 # The percent that acts as the lower quartile for the signal
outlier_upperbound = 87.05909901941988 # The percent that acts as the upper quartile for the signal

# 2nd‑pass blink detection
dup_trough_proximity_threshold = 0.3 # This settings makes sure that no troughs are being doubled counted in the data as anything within that time in seconds will be labeled only once
symmetry_threshold = 51.1662910568745 # The lowest symetry allowed for a blink
blink_trough_threshold = 0.1 # The minimum depth a trough can be to get scored as a blink
max_blink_duration = 0.45 # The General Maximum Blink duration allowed in seconds
min_blink_duration = 0.1 # The General Minimum Blink duration allowed in seconds

# Blink‑likelihood weights
min_duration_percent = 1.0 # Weight of score given if a blink is longer then minimum duration
max_duration_percent = 64.37231191603038 # Weight of score given if a blink is shorter then maximum duration
symmetry_percent = 99.0 # Weight of score given if a blink is within symmetry
amplitude_percent = 7.69513579269388 # Weight of score given if a blink is within amplitude

# 3rd‑pass tuning
tp_amplitude_threshold = 2.0 # The width around the blink amplitude that is used to calculate the 3rd pass data
tp_symmetry_threshold = 70.07846734816201 # The width around the symettry that is used to calculate the 3rd pass data
likely_blink_percent = 100.0 # If a blink is over this liklihood is it considered high confidence and gets weighed more when calculating personal metrics
unlikey_blink_percent = 0.0 # If a blink is under this liklihood is it considered low  confidence and gets weighed less when calculating personal metrics
likey_blink_factor = 1.0 # If a blink is likely you can weight the new metrics to be closer to likly blinks
unlikey_blink_factor = 0.1 # If a blink is unlikely you can weight the new metrics to be closer to likly blinks

# Define the search space for Bayesian optimization
search_space = [Real(0.0, 1.0,   name='noise_threshold'), Real(0.0, 100.0, name='symmetry_threshold'), Real(0.1, 1.5,   name='blink_trough_threshold'), Real(0.35, 0.8,  name='max_blink_duration'), Real(0.05, 0.105,name='min_blink_duration'), Real(0.0, 10.0,  name='outlier_threshold'), Real(0.0, 60.0,  name='outlier_lowerbound'), Real(70.0,100.0, name='outlier_upperbound'), Real(50.0, 100.0,name='likely_blink_percent'), Real(0.0, 49.0, name='unlikey_blink_percent'), Real(1.0, 99.0, name='min_duration_percent'), Real(1.0, 99.0, name='max_duration_percent'), Real(1.0, 99.0, name='symmetry_percent'), Real(1.0, 99.0, name='amplitude_percent'), Real(0.4, 2.0,   name='tp_amplitude_threshold'), Real(1.0, 100.0, name='tp_symmetry_threshold'), Real(1.0, 2.0,   name='likey_blink_factor'), Real(0.1, 0.9,   name='unlikey_blink_factor')]

# ----------------------------------------------------------------------------
@use_named_args(search_space)
def objective_function(noise_threshold, symmetry_threshold, blink_trough_threshold, max_blink_duration, min_blink_duration, outlier_threshold, outlier_lowerbound, outlier_upperbound, likely_blink_percent, unlikey_blink_percent, min_duration_percent, max_duration_percent, symmetry_percent, amplitude_percent, tp_amplitude_threshold, tp_symmetry_threshold, likey_blink_factor, unlikey_blink_factor):

    raw_weights = np.array([min_duration_percent, max_duration_percent, symmetry_percent, amplitude_percent])
    normalized_weights = 100.0 * raw_weights / np.sum(raw_weights)

    min_duration_percent = normalized_weights[0]
    max_duration_percent = normalized_weights[1]
    symmetry_percent = normalized_weights[2]
    amplitude_percent = normalized_weights[3]

    total_precision = 0
    total_recall = 0
    total_files = 0

    for table in tables:
        data = pd.read_sql(f"SELECT * FROM [{table}]", conn)
        time_array = data['Time (s)'].values

        blink_times = data[data['Blink'] == 1]['Time (s)']
        total_actual_blinks = len(blink_times)

        PB_Dictionary = {"trough_time": [], "stable_points": [], "amplitude": [], "duration_opening": [], "duration_closing": [], "symmetry": [], "blink_likelyhood": [], "is_blink": [], "duration_total": [], "opening_velocity": [], "closing_velocity": [], "blink_validation": [],}

        filtered_signal = bandpass_filter(data['Voltage (V)'], lowcut, highcut, sample_rate)
        filtered_signal = remove_outliers(filtered_signal, outlier_threshold, outlier_lowerbound, outlier_upperbound)
        filtered_signal = scale_signal(filtered_signal)
        scaled_signal = reduce_noise(filtered_signal, noise_threshold)

        PB_Dictionary["trough_time"], PB_Dictionary["stable_points"], PB_Dictionary["amplitude"] = identify_troughs_and_stable_points(scaled_signal, 0.2, time_array, filtered_signal,search_radius=trough_search_radius,trough_proximity_threshold=dup_trough_proximity_threshold)
        PB_Dictionary["duration_total"], PB_Dictionary["duration_opening"], PB_Dictionary["duration_closing"], PB_Dictionary["symmetry"], PB_Dictionary["opening_velocity"], PB_Dictionary["closing_velocity"] = analyze_troughs(scaled_signal, PB_Dictionary)

        PB_Dictionary["blink_likelyhood"], PB_Dictionary["is_blink"] = identify_blinks(PB_Dictionary, min_blink_duration, max_blink_duration, blink_trough_threshold, symmetry_threshold, min_duration_percent, max_duration_percent, symmetry_percent, amplitude_percent)

        PB_Dictionary = third_pass(PB_Dictionary, tp_amplitude_threshold, tp_symmetry_threshold, "iqr", likely_blink_percent, unlikey_blink_percent, likey_blink_factor, unlikey_blink_factor)

        for key, value in PB_Dictionary.items():
            if isinstance(value, np.ndarray):
                PB_Dictionary[key] = value.tolist()

        PB_Dictionary = validate_blinks(PB_Dictionary, blink_times, proximity_threshold)

        if isinstance(PB_Dictionary["blink_validation"], np.ndarray):
            PB_Dictionary["blink_validation"] = PB_Dictionary["blink_validation"].tolist()

        validation = PB_Dictionary["blink_validation"]
        tp = validation.count("TP")
        fp = validation.count("FP")
        fn = validation.count("FN")


        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0

        total_precision += precision
        total_recall += recall
        total_files += 1

    avg_precision = total_precision / total_files if total_files > 0 else 0
    avg_recall = total_recall / total_files if total_files > 0 else 0

    beta = 1.1
    if avg_precision + avg_recall == 0:
        f_beta_score = 0
    else:
        f_beta_score = (1 + beta**2) * (avg_precision * avg_recall) / ((beta**2 * avg_precision) + avg_recall)

    false_positive_penalty = 0.2
    false_negative_penalty = 0.8

    weighted_score = f_beta_score - (false_positive_penalty * (1 - avg_precision)) - (false_negative_penalty * (1 - avg_recall))

    print(f"Attempt Completed - Precision: {avg_precision:.4f}, Recall: {avg_recall:.4f}, F-beta: {f_beta_score:.4f}, Weighted Score: {weighted_score:.4f}")

    return -weighted_score

# Execute Bayesian Optimization
results = gp_minimize(func=objective_function, dimensions=search_space, n_calls=200, random_state=69 )

optimal_params = {dim.name: value for dim, value in zip(search_space, results.x)}
print("Optimal Parameters:", optimal_params)
best_weighted_score = -results.fun
print(f"Best Weighted Score: {best_weighted_score:.4f}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Attempt Completed - Precision: 0.1734, Recall: 0.3388, F-beta: 0.2367, Weighted Score: -0.4576
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
No detected blinks to analyze.
Attempt Completed - Precision: 0.5486, Recall: 0.1642, F-beta: 0.2405, Weighted Score: -0.5184
No detected blinks to analyze.
Attempt Completed - Precision: 0.6068, Recall: 0.6195, F-beta: 0.6137, Weighted Score: 0.230

KeyboardInterrupt: 