# Flc-model


* fractional logistic regression -> predict scores (R2, goodness-of-fit, metric weights)
* logistic regression -> isolate high-quality units (R2, goodness-of-fit,)

* simplest model: logistic regression on quality metrics (reference to literature)

* we can increase the number of units by adding all non-good units

In [24]:
%load_ext autoreload
%autoreload 2
import os
import torch
import numpy as np
from matplotlib import pyplot as plt
import sklearn
import pandas as pd
from statsmodels.iolib.smpickle import load_pickle

# set project path
proj_path = "/gpfs/bbp.cscs.ch/project/proj85/home/laquitai/spikebias/"
os.chdir(proj_path)

from src.nodes.utils import get_config
from src.nodes import utils 
from src.nodes.models.Flc import dataloader as flc_dataloader
from src.nodes.models.Flc.models import FlcModel # FLC model
from src.nodes.models.CebraSpike.models import CebraSpike # FLC model
from src.nodes.models.CebraSpike import plotutils

# npx spont. biophy.
cfg_e, _ = get_config("silico_neuropixels", "stimulus").values()
KS4_e_10m = cfg_e["sorting"]["sorters"]["kilosort4"]["10m"][
    "output"
]  # sorting with KS4
GT_e_10m = cfg_e["sorting"]["simulation"]["ground_truth"]["10m"]["output"] # KS4 sorting
STUDY_e = cfg_e["postprocessing"]["waveform"]["sorted"]["study"]["kilosort4"][
    "10m"
]  # WaveformExtractor
STUDY_e_su = '/gpfs/bbp.cscs.ch/project/proj85/scratch/laquitai/preprint_2024/0_silico/4_spikesorting_stimulus_test_neuropixels_8-1-24__8slc_80f_360r_50t_200ms_1_smallest_fiber_gids/0fcb7709-b1e9-4d84-b056-5801f20d55af/postpro/realism/spike/sorted/study_ks4_10m_single_units'

# PATHS
# pre-computed sorted unit quality
quality_path = "/gpfs/bbp.cscs.ch/project/proj85/scratch/laquitai/preprint_2024/analysis/sorting_quality/sorting_quality.csv"

# model save path
model_path = "/gpfs/bbp.cscs.ch/project/proj85/scratch/laquitai/preprint_2024/analysis/sorting_quality/models/evoked/flc/model_on_full_10m_data.pickle"

# axes
plt.rcParams["font.family"] = "Arial"
plt.rcParams["font.size"] = 6  # 5-7 with Nature neuroscience as reference
plt.rcParams["lines.linewidth"] = 0.5 # typically between 0.5 and 1
plt.rcParams["axes.linewidth"] = 0.5 #1
plt.rcParams["axes.spines.top"] = False
plt.rcParams["xtick.major.width"] = 0.5 #0.8 #* 1.3
plt.rcParams["xtick.minor.width"] = 0.5 #0.8 #* 1.3
plt.rcParams["ytick.major.width"] = 0.5 #0.8 #* 1.3
plt.rcParams["ytick.minor.width"] = 0.5 #0.8 #* 1.3
plt.rcParams["xtick.major.size"] = 3.5 * 1.1
plt.rcParams["xtick.minor.size"] = 2 * 1.1
plt.rcParams["ytick.major.size"] = 3.5 * 1.1
plt.rcParams["ytick.minor.size"] = 2 * 1.1
# legend
legend_cfg = {"frameon": False, "handletextpad": 0.5}
tight_layout_cfg = {"pad": 0.001}
LG_FRAMEON = False              # no legend frame

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2024-10-04 10:24:02,754 - root - utils.py - get_config - INFO - Reading experiment config.
2024-10-04 10:24:03,037 - root - utils.py - get_config - INFO - Reading experiment config. - done


In [17]:
# check for GPU
use_cuda = torch.cuda.is_available()
if use_cuda:
    print("__CUDNN VERSION:", torch.backends.cudnn.version())
    print("__Number CUDA Devices:", torch.cuda.device_count())
    print("__CUDA Device Name:", torch.cuda.get_device_name(0))
    print(
        "__CUDA Device Total Memory [GB]:",
        torch.cuda.get_device_properties(0).total_memory / 1e9,
    )

__CUDNN VERSION: 90100
__Number CUDA Devices: 4
__CUDA Device Name: Tesla V100-SXM2-16GB
__CUDA Device Total Memory [GB]: 16.935419904


## (2s)Load data and evaluate model

* training on full dataset
* lasso regularized 
* the dataset is a dataframe that contains the sorted single-units (indices), their quality metrics and their quality label ()"good" or "bad" units evaluated with our ground truth, columns)

In [29]:
# parameters
LOAD_DATA = False
EVAL = False
SAVE_MODEL = False

# load the dataset formatted for the model
if LOAD_DATA:
    data_flc = flc_dataloader.load_dataset(
        quality_path, "E", "KS4", KS4_e_10m, STUDY_e, STUDY_e_su, GT_e_10m
    )

# evaluate the model
if EVAL:

    # instantiate the model
    flcmodel = FlcModel(data_flc["predictors"])

    # train and evaluate with cross-validation
    flc_results = flcmodel.evaluate_on_full_dataset(
        data_flc["dataset"],
        thresh=0.8,
        regularization="elastic_net",
        maxiter=100,
        cnvrg_tol=1e-10,
        scale_data=True,
        verbose=False,
    )
    display(flc_results["metric_data"])

# save model
if SAVE_MODEL:
    utils.create_if_not_exists(os.path.dirname(model_path))
    flc_results["metric_data"]["model"].save(model_path)
else:
    model = load_pickle(model_path)
    display(model)

<statsmodels.genmod.generalized_linear_model.GLMResultsWrapper at 0x7ffe4c14d3d0>