Merge pull request #1 from brendonw1/master

123
petersenpeter · Dec 1, 2019 · 460c2e5 · 460c2e5
2 parents e115faf + afa7c00
commit 460c2e5
Show file tree

Hide file tree

Showing 21 changed files with 2,230 additions and 699 deletions.
diff --git a/StandardConfig_KSW.m → ...rationFiles/KilosortConfiguration_Cesar.m b/StandardConfig_KSW.m → ...rationFiles/KilosortConfiguration_Cesar.m
@@ -36,7 +36,7 @@
 % end
 ops.nt0 = round(1.6*ops.fs/1000); % window width in samples. 1.6ms at 20kH corresponds to 32 samples
 
-ops.nNeighPC            = min([12 ops.Nchan]); % visualization only (Phy): number of channnels to mask the PCs, leave empty to skip (12)
+ops.nNeighPC            = min([16 ops.Nchan]); % visualization only (Phy): number of channnels to mask the PCs, leave empty to skip (12)
 ops.nNeigh              = min([16 ops.Nchan]); % visualization only (Phy): number of neighboring templates to retain projections of (16)
 
 % options for channel whitening
@@ -52,7 +52,7 @@
 ops.Nrank            = 3;    % matrix rank of spike template model (3)
 ops.nfullpasses      = 6;    % number of complete passes through data during optimization (6)
 ops.maxFR            = 40000;  % maximum number of spikes to extract per batch (20000)
-ops.fshigh           = 300;   % frequency for high pass filtering
+ops.fshigh           = 500;   % frequency for high pass filtering
 ops.fslow            = 8000;   % frequency for low pass filtering (optional)
 ops.ntbuff           = 64;    % samples of symmetrical buffer for whitening and spike detection
 ops.scaleproc        = 200;   % int16 scaling of whitened data
@@ -61,22 +61,22 @@
 % the following options can improve/deteriorate results.
 % when multiple values are provided for an option, the first two are beginning and ending anneal values,
 % the third is the value used in the final pass.
-ops.Th               = [6 12 12];    % threshold for detecting spikes on template-filtered data ([6 12 12])
-ops.lam              = [12 40 40];   % large means amplitudes are forced around the mean ([10 30 30])
+ops.Th               = [5 6 6];    % threshold for detecting spikes on template-filtered data ([6 12 12])
+ops.lam              = [10 20 20];   % large means amplitudes are forced around the mean ([10 30 30])
 ops.nannealpasses    = 4;            % should be less than nfullpasses (4)
 ops.momentum         = 1./[20 800];  % start with high momentum and anneal (1./[20 1000])
 ops.shuffle_clusters = 1;            % allow merges and splits during optimization (1)
 ops.mergeT           = .1;           % upper threshold for merging (.1)
 ops.splitT           = .1;           % lower threshold for splitting (.1)
 
 % options for initializing spikes from data
-ops.initialize      = 'fromData';    %'fromData' or 'no'
-ops.spkTh           = -6;      % spike threshold in standard deviations (4)
+ops.initialize      = 'no';    %'fromData' or 'no'
+ops.spkTh           = 4;      % spike threshold in standard deviations (4)
 ops.loc_range       = [3  1];  % ranges to detect peaks; plus/minus in time and channel ([3 1])
 ops.long_range      = [30  6]; % ranges to detect isolated peaks ([30 6])
 ops.maskMaxChannels = 8;       % how many channels to mask up/down ([5])
 ops.crit            = .65;     % upper criterion for discarding spike repeates (0.65)
-ops.nFiltMax        = 80000;   % maximum "unique" spikes to consider (10000)
+ops.nFiltMax        = 10000;   % maximum "unique" spikes to consider (10000)
 
 % load predefined principal components (visualization only (Phy): used for features)
 dd                  = load('PCspikes2.mat'); % you might want to recompute this from your own data
@@ -86,4 +86,7 @@
 ops.fracse  = 0.1; % binning step along discriminant axis for posthoc merges (in units of sd)
 ops.epu     = Inf;
 ops.ForceMaxRAMforDat   = 15000000000; % maximum RAM the algorithm will try to use; on Windows it will autodetect.
+
+% Saving xml content to ops strucuture
+ops.xml = xml;
 end
diff --git a/ConfigurationFiles/KilosortConfiguration_Omid.m b/ConfigurationFiles/KilosortConfiguration_Omid.m
@@ -0,0 +1,92 @@
+function ops = StandardConfig_KSW(XMLfile)
+
+% Loads xml parameters (Neuroscope)
+xml = LoadXml(XMLfile);
+% Define rootpath
+rootpath = fileparts(XMLfile);
+
+ops.GPU                 = 1; % whether to run this code on an Nvidia GPU (much faster, mexGPUall first)
+ops.parfor              = 1; % whether to use parfor to accelerate some parts of the algorithm
+ops.verbose             = 1; % whether to print command line progress
+ops.showfigures         = 0; % whether to plot figures during optimization
+ops.datatype            = 'dat';  % binary ('dat', 'bin') or 'openEphys'
+ops.fbinary             = [XMLfile(1:end-3) 'dat']; % will be created for 'openEphys'
+
+%Should get rid of this...
+if isdir('G:\Kilosort')
+    disp('Creating a temporary dat file on the SSD drive')
+    ops.fproc = ['G:\Kilosort\temp_wh.dat'];
+else
+    ops.fproc = fullfile(rootpath,'temp_wh.dat');
+end
+ops.root                = rootpath; % 'openEphys' only: where raw files are
+ops.fs                  = xml.SampleRate;        % sampling rate
+
+load(fullfile(rootpath,'chanMap.mat'))
+ops.NchanTOT            = length(connected); % total number of channels
+
+ops.Nchan = sum(connected>1e-6); % number of active channels
+
+templatemultiplier = 8;
+ops.Nfilt              =   ops.Nchan*templatemultiplier - mod(ops.Nchan*templatemultiplier,32); % number of filters to use (2-4 times more than Nchan, should be a multiple of 32)
+% if ops.Nfilt > 2024;
+%     ops.Nfilt = 2024;
+% elseif ops.Nfilt == 0
+%     ops.Nfilt = 32;
+% end
+ops.nt0 = round(1.6*ops.fs/1000); % window width in samples. 1.6ms at 20kH corresponds to 32 samples
+
+ops.nNeighPC            = min([16 ops.Nchan]); % visualization only (Phy): number of channnels to mask the PCs, leave empty to skip (12)
+ops.nNeigh              = min([16 ops.Nchan]); % visualization only (Phy): number of neighboring templates to retain projections of (16)
+
+% options for channel whitening
+ops.whitening           = 'full'; % type of whitening (default 'full', for 'noSpikes' set options for spike detection below)
+ops.nSkipCov            = 1; % compute whitening matrix from every N-th batch (1)
+ops.whiteningRange      = min([64 ops.Nchan]); % how many channels to whiten together (Inf for whole probe whitening, should be fine if Nchan<=32)
+
+% define the channel map as a filename (string) or simply an array
+ops.chanMap             = fullfile(rootpath,'chanMap.mat'); % make this file using createChannelMapFile.m
+ops.criterionNoiseChannels = 0.00001; % fraction of "noise" templates allowed to span all channel groups (see createChannelMapFile for more info).
+
+% other options for controlling the model and optimization
+ops.Nrank            = 3;    % matrix rank of spike template model (3)
+ops.nfullpasses      = 6;    % number of complete passes through data during optimization (6)
+ops.maxFR            = 40000;  % maximum number of spikes to extract per batch (20000)
+ops.fshigh           = 500;   % frequency for high pass filtering
+ops.fslow            = 8000;   % frequency for low pass filtering (optional)
+ops.ntbuff           = 64;    % samples of symmetrical buffer for whitening and spike detection
+ops.scaleproc        = 200;   % int16 scaling of whitened data
+ops.NT               =  4*32*1028+ ops.ntbuff;% this is the batch size (try decreasing if out of memory) for GPU should be multiple of 32  + ntbuff
+
+% the following options can improve/deteriorate results.
+% when multiple values are provided for an option, the first two are beginning and ending anneal values,
+% the third is the value used in the final pass.
+ops.Th               = [5 6 6];    % threshold for detecting spikes on template-filtered data ([6 12 12])
+ops.lam              = [10 20 20];   % large means amplitudes are forced around the mean ([10 30 30])
+ops.nannealpasses    = 4;            % should be less than nfullpasses (4)
+ops.momentum         = 1./[20 800];  % start with high momentum and anneal (1./[20 1000])
+ops.shuffle_clusters = 1;            % allow merges and splits during optimization (1)
+ops.mergeT           = .1;           % upper threshold for merging (.1)
+ops.splitT           = .1;           % lower threshold for splitting (.1)
+
+% options for initializing spikes from data
+ops.initialize      = 'no';    %'fromData' or 'no'
+ops.spkTh           = 4;      % spike threshold in standard deviations (4)
+ops.loc_range       = [3  1];  % ranges to detect peaks; plus/minus in time and channel ([3 1])
+ops.long_range      = [30  6]; % ranges to detect isolated peaks ([30 6])
+ops.maskMaxChannels = 8;       % how many channels to mask up/down ([5])
+ops.crit            = .65;     % upper criterion for discarding spike repeates (0.65)
+ops.nFiltMax        = 10000;   % maximum "unique" spikes to consider (10000)
+
+% load predefined principal components (visualization only (Phy): used for features)
+dd                  = load('PCspikes2.mat'); % you might want to recompute this from your own data
+ops.wPCA            = dd.Wi(:,1:7); % PCs
+
+% options for posthoc merges (under construction)
+ops.fracse  = 0.1; % binning step along discriminant axis for posthoc merges (in units of sd)
+ops.epu     = Inf;
+ops.ForceMaxRAMforDat   = 15000000000; % maximum RAM the algorithm will try to use; on Windows it will autodetect.
+
+% Saving xml content to ops strucuture
+ops.xml = xml;
+end
diff --git a/ConvertKilosort2Neurosuite_KSW.m b/ConvertKilosort2Neurosuite_KSW.m
@@ -22,7 +22,7 @@ function ConvertKilosort2Neurosuite_KSW(rez)
     [~,basename] = fileparts(cd);
     basepath = cd;
 end
-if ~exist('rez','var');
+if ~exist('rez','var')
     load(fullfile(basepath,'rez.mat'))
 end
 

diff --git a/KiloSortWrapper.m b/KiloSortWrapper.m
@@ -1,59 +1,102 @@
-function savepath = KiloSortWrapper(basepath,basename)
+function savepath = KiloSortWrapper(varargin)
 % Creates channel map from Neuroscope xml files, runs KiloSort and
-% writes output data in the Neuroscope/Klusters format. 
-% StandardConfig.m should be in the path or copied to the local folder
+% writes output data to Neurosuite format or Phy.
 % 
-%  USAGE
+% USAGE
 %
-%    KiloSortWrapper()
-%    Should be run from the data folder, and file basenames are the
-%    same as the name as current directory
+% KiloSortWrapper
+% Run from data folder. File basenames must be the
+% same as the name as current folder
 %
-%    KiloSortWrapper(basepath,basenmae)
+% KiloSortWrapper(varargin)
+% Check varargin description below when input parameters are parsed
 %
-%    INPUTS
-%    basepath       path to the folder containing the data
-%    basename       file basenames (of the dat and xml files)
+% Dependencies:  KiloSort (https://github.com/cortex-lab/KiloSort)
 %
-%    Dependencies:  KiloSort (https://github.com/cortex-lab/KiloSort)
-
+% The AutoClustering requires CCGHeart to be compiled.
+% Go to the private folder of the wrapper and type:
+% mex -O CCGHeart.c
+% 
 % Copyright (C) 2016 Brendon Watson and the Buzsakilab
 %
 % This program is free software; you can redistribute it and/or modify
 % it under the terms of the GNU General Public License as published by
 % the Free Software Foundation; either version 2 of the License, or
 % (at your option) any later version.
+
 disp('Running Kilosort spike sorting with the Buzsaki lab wrapper')
 
-%% Addpath if needed
-% addpath(genpath('gitrepositories/KiloSort')) % path to kilosort folder
-% addpath(genpath('gitrepositories/npy-matlab')) % path to npy-matlab scripts
-
-%% If function is called without argument
-if nargin == 0
-   [~,basename] = fileparts(cd);
-   basepath = cd;
-elseif nargin == 1
-    [~,basename] = fileparts(basepath);
-    basepath = cd;
-end
+%% Parsing inputs
+p = inputParser;
+basepath = cd;
+[~,basename] = fileparts(basepath);
+
+addParameter(p,'basepath',basepath,@ischar)         % path to the folder containing the data
+addParameter(p,'basename',basename,@ischar)         % file basenames (of the dat and xml files)
+addParameter(p,'GPU_id',1,@isnumeric)               % Specify the GPU_id
+addParameter(p,'SSD_path','K:\Kilosort',@ischar)    % Path to SSD disk. Make it empty to disable SSD
+addParameter(p,'CreateSubdirectory',1,@isnumeric)   % Puts the Kilosort output into a subfolder
+addParameter(p,'performAutoCluster',1,@isnumeric)   % Performs PhyAutoCluster once Kilosort is complete when exporting to Phy.
+addParameter(p,'config','',@ischar)                 % Specify a configuration file to use from the ConfigurationFiles folder. e.g. 'Omid'
+
+parse(p,varargin{:})
+
+basepath = p.Results.basepath;
+basename = p.Results.basename;
+GPU_id = p.Results.GPU_id;
+SSD_path = p.Results.SSD_path;
+CreateSubdirectory = p.Results.CreateSubdirectory;
+performAutoCluster = p.Results.performAutoCluster;
+config = p.Results.config;
+
 cd(basepath)
 
+%% Checking if dat and xml files exist
+if ~exist(fullfile(basepath,[basename,'.xml']))
+    warning('KilosortWrapper  %s.xml file not in path %s',basename,basepath);
+    return
+elseif ~exist(fullfile(basepath,[basename,'.dat']))
+    warning('KilosortWrapper  %s.dat file not in path %s',basename,basepath)
+    return
+end
+
 %% Creates a channel map file
 disp('Creating ChannelMapFile')
-createChannelMapFile_KSW(basepath,'staggered');
+createChannelMapFile_KSW(basepath,basename,'staggered');
 
-%% default options are in parenthesis after the comment
+%% Loading configurations
 XMLFilePath = fullfile(basepath, [basename '.xml']);
-% if exist(fullfile(basepath,'StandardConfig.m'),'file') %this should actually be unnecessary
-%     addpath(basepath);
-% end
-ops = StandardConfig_KSW(XMLFilePath);
+
+if isempty(config)
+    disp('Running Kilosort with standard settings')
+    ops = KilosortConfiguration(XMLFilePath);
+else
+    disp('Running Kilosort with user specific settings')
+    config_string = str2func(['KiloSortConfiguration_' config_version]);
+    ops = config_string(XMLFilePath);
+    clear config_string;
+end
+
+%% % Checks SSD location for sufficient space
+if isdir(SSD_path)
+    FileObj = java.io.File(SSD_path);
+    free_bytes = FileObj.getFreeSpace;
+    dat_file = dir(fullfile(basepath,[basename,'.dat']));
+    if dat_file.bytes*1.1<FileObj.getFreeSpace
+        disp('Creating a temporary dat file on the SSD drive')
+        ops.fproc = fullfile(SSD_path, [basename,'_temp_wh.dat']);
+    else
+        warning('Not sufficient space on SSD drive. Creating local dat file instead')
+        ops.fproc = fullfile(basepath,'temp_wh.dat');
+    end
+else
+    ops.fproc = fullfile(basepath,'temp_wh.dat');
+end
 
 %%
 if ops.GPU
     disp('Initializing GPU')
-    gpuDevice(1); % initialize GPU (will erase any existing GPU arrays)
+    gpudev = gpuDevice(GPU_id); % initialize GPU (will erase any existing GPU arrays)
 end
 if strcmp(ops.datatype , 'openEphys')
    ops = convertOpenEphysToRawBInary(ops);  % convert data, only for OpenEphys
@@ -72,7 +115,7 @@
 
 %% posthoc merge templates (under construction)
 % save matlab results file
-CreateSubdirectory = 1;
+
 if CreateSubdirectory
     timestamp = ['Kilosort_' datestr(clock,'yyyy-mm-dd_HHMMSS')];
     savepath = fullfile(basepath, timestamp);
@@ -88,15 +131,35 @@
 % rez = merge_posthoc2(rez);
 save(fullfile(savepath,  'rez.mat'), 'rez', '-v7.3');
 
-%% save python results file for Phy
-disp('Converting to Phy format')
-rezToPhy_KSW(rez);
+%% export python results file for Phy
+if ops.export.phy
+    disp('Converting to Phy format')
+    rezToPhy_KSW(rez);
+
+    % AutoClustering the Phy output
+    if performAutoCluster
+        PhyAutoClustering(savepath);
+    end
+end
+
+%% export Neurosuite files
+if ops.export.neurosuite
+    disp('Converting to Klusters format')
+    load('rez.mat')
+    rez.ops.root = pwd;
+    clustering_path = pwd;
+    basename = rez.ops.basename;
+    rez.ops.fbinary = fullfile(pwd, [basename,'.dat']);
+    Kilosort2Neurosuite(rez)
 
-%% save python results file for Klusters
-% disp('Converting to Klusters format')
-% ConvertKilosort2Neurosuite_KSW(rez);
+    writeNPY(rez.ops.kcoords, fullfile(clustering_path, 'channel_shanks.npy'));
+
+    phy_export_units(clustering_path,basename);
+end
 
-%% Remove temporary file
+%% Remove temporary file and resetting GPU
 delete(ops.fproc);
+reset(gpudev)
+gpuDevice([])
 disp('Kilosort Processing complete')