spectrograms made with morlet waveletes

used in the final report

#information about Morlet wavelet
#https://www.youtube.com/watch?v=7ahrcB5HL0k&list=PLn0OLiymPak2BYu--bR0ADNBJsC4kuRWs&ab_channel=MikeXCohen
#https://www.biorxiv.org/node/120256.full
#http://nicolasfauchereau.github.io/climatecode/posts/wavelet-analysis-in-python/

In [None]:
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import datajoint as dj
import neurodsp.filt
import scipy
import numpy as np
import pandas as pd
from pathlib import Path

from statsmodels.stats import multitest
from numpy.random import RandomState
from permute.core import one_sample, two_sample
from netneurotools import stats
import mne
import pywt

import utils.helper_func as hf
import utils.plotting as plotting

In [None]:
top_dir = '/home/anastasia/epiphyte/anastasia/output'

patient_id = 53
session_nr = 1
folder = f'04-spectrogram_wavelet_{patient_id}'

df_patient_info = pd.read_csv(f'{top_dir}/{patient_id}_channel_info.csv')

fs = 32768
#The resulting sample rate is up / down times the original sample rate.
up = 1
down=32
fs_downs = (up/down)*fs
dt = 1/fs_downs      # sampling period/time/interval or time resolution, often denoted as T

signal_len = np.int(fs_downs*1.5)+1
times = np.linspace(-0.5, 1, signal_len)
times = times*1000
time_zero_idx = np.where(times == hf.find_nearest(times, 0))[0][0]

df_stim_info = pd.read_csv(f'{top_dir}/{patient_id}_df_stim_info.csv')
df_stim_info = df_stim_info.loc[df_stim_info['patient_id'] == patient_id]
df_stim_info = df_stim_info.replace('???', 'unknown')
#df_stim_info = df_stim_info.reset_index()

all_stim = np.unique(df_stim_info['stim_id'])
all_stim_name = np.unique(df_stim_info['stim_name'])

alpha = 0.05
prng = RandomState(50)
n_permutations = 5000
method = 'fdr_bh'

columns = ['channel_name', 'channel_location', 'stim_index', 'stim_name', 'paradigm', 'corr_results', 'locations1', 'locations2']

#times for the baseline
t1 = -500
t2 = -50


In [None]:
parent_dir = f'{top_dir}/{folder}/spectrograms/raw'
if not os.path.exists(parent_dir):
    os.makedirs(parent_dir)

parent_dir = f'{top_dir}/{folder}/spectrograms/log'
if not os.path.exists(parent_dir):
    os.makedirs(parent_dir)

norm_types = ['db', 'zscore', 'raw']
for norm_type in norm_types:
    parent_dir = f'{top_dir}/{folder}/spectrograms/normalized/{norm_type}'
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    parent_dir = f'{top_dir}/{folder}/plots/{norm_type}'
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
        hf.create_folder_structure(parent_dir, df_patient_info)

In [None]:
#params   
w = 6.
num_freqs=140
min_freq = 1
max_freq = fs_downs/2

#freq = np.linspace(min_freq, max_freq, num_freqs)
freq = np.logspace(np.log10(min_freq),np.log10(max_freq),num_freqs)

widths = w*fs_downs / (2*freq*np.pi) #get wavelet width parameter at each frequency

idx = np.where(freq == hf.find_nearest(freq, 100))[0][0]
freq_less = freq[0:idx+1]

From scipy.signal documentation
The fundamental frequency of this wavelet in Hz is given by::
    f = w*fs / (2*s*np.pi)
where ``fs`` is the sampling rate and `s` is the wavelet width parameter.
Similarly we can get the wavelet width parameter at frequency ``f``::
    s = w*fs / (2*f*np.pi)

In [None]:
epochs = np.load(f'{top_dir}/01-preprocessed_{patient_id}/epochs/broadband_2_sec/{patient_id}_epochs_channel-CSC1.npy')
pre_epoch = epochs[0, 512:]
estim = scipy.signal.cwt(pre_epoch, scipy.signal.morlet2, widths,w=w)
plt.pcolormesh(times, freq[:idx], np.abs(estim[:idx,:])**2, cmap='jet', shading='gouraud')
plt.show()
plt.pcolormesh(times, freq, np.abs(estim)**2, cmap='viridis', shading='gouraud')
plt.show()
a = 10*np.log10(np.abs(estim)**2)
plt.pcolormesh(times, freq[:idx], a[:idx,:], cmap='viridis', shading='gouraud')
plt.show()
estim = estim.shape

In [None]:
#compute spectrogram
for i in range(len(df_patient_info['channel_name'])):
    ch = df_patient_info.loc[i+63,'channel_name']
    print(ch)
    ch_site = df_patient_info.loc[i+63,'recording_site']
    epochs = np.load(f'{top_dir}/01-preprocessed_{patient_id}/epochs/broadband_2_sec/{patient_id}_epochs_channel-{ch}.npy')
    
    for st in all_stim:
        
        cwtm_pre_array, cwtm_post_array, current_stim_index, current_stim_name = hf.compute_wavelet_spectrogram(df_stim_info, st, estim, epochs, widths, w)
        
        #np.save(f'{top_dir}/{folder}/spectrograms/raw/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_pre.npy', cwtm_pre_array)
        #np.save(f'{top_dir}/{folder}/spectrograms/raw/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_post.npy', cwtm_post_array)

        cwtm_pre_array_log = np.log10(cwtm_pre_array)
        cwtm_post_array_log = np.log10(cwtm_post_array)
        np.save(f'{top_dir}/{folder}/spectrograms/log/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_pre.npy', cwtm_pre_array_log)
        np.save(f'{top_dir}/{folder}/spectrograms/log/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_post.npy', cwtm_post_array_log)

np.save(f'{top_dir}/{folder}/spectrograms/freq.npy', freq)
np.save(f'{top_dir}/{folder}/spectrograms/freq_less.npy', freq_less)
np.save(f'{top_dir}/{folder}/spectrograms/times.npy', times)

In [None]:
#normalize
t = np.load(f'{top_dir}/{folder}/spectrograms/times.npy')
sp_dir = f'{top_dir}/{folder}/spectrograms'

for i in range(len(df_patient_info['channel_name'])):
    ch = df_patient_info.loc[i+63,'channel_name']
    print(ch)
    ch_site = df_patient_info.loc[i+63,'recording_site']
    
    #baseline_pre, baseline_post = hf.compute_baseline_per_channel(ch, ch_site, df_stim_info, sp_dir, estim, all_stim, t, t1, t2, sp_type='raw')
    baseline_pre_log, baseline_post_log = hf.compute_baseline_per_channel(ch, ch_site, df_stim_info, sp_dir, estim, all_stim, t, t1, t2, sp_type='log')
    
    for st in all_stim:
        pre = df_stim_info.loc[(df_stim_info['position']=='pre') & (df_stim_info['stim_id']==st)]
        pre = pre.reset_index(drop=True)
        post = df_stim_info.loc[(df_stim_info['position']=='post') & (df_stim_info['stim_id']==st)]
        
        current_stim_index = pre.loc[0,'stim_id']
        current_stim_name = pre.loc[0,'stim_name']
        current_stim_paradigm = pre.loc[0,'paradigm']
        
        #pre_sp_array = np.load(f'{top_dir}/{folder}/spectrograms/raw/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_pre.npy')
        #post_sp_array = np.load(f'{top_dir}/{folder}/spectrograms/raw/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_post.npy')
        #baseline normalization & dB
        #pre_sp_array_db = hf.db_normalize(pre_sp_array, baseline_pre)
        #post_sp_array_db = hf.db_normalize(post_sp_array, baseline_post)
        #np.save(f'{top_dir}/{folder}/spectrograms/normalized/db/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_pre_db.npy', pre_sp_array_db)
        #np.save(f'{top_dir}/{folder}/spectrograms/normalized/db/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_post_db.npy', post_sp_array_db)
        
        #baseline normalization & z-score
        pre_sp_array = np.load(f'{top_dir}/{folder}/spectrograms/log/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_pre.npy')
        post_sp_array = np.load(f'{top_dir}/{folder}/spectrograms/log/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_post.npy')

        pre_sp_array_zscore = hf.baseline_zscore(pre_sp_array, baseline_pre_log)
        post_sp_array_zscore = hf.baseline_zscore(post_sp_array, baseline_post_log)
        
        np.save(f'{top_dir}/{folder}/spectrograms/normalized/zscore/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_pre_zscore.npy', pre_sp_array_zscore)
        np.save(f'{top_dir}/{folder}/spectrograms/normalized/zscore/{ch}_{ch_site}_{current_stim_index}_{current_stim_name}_post_zscore.npy', post_sp_array_zscore)
        

In [None]:
current_stim_index = 11
current_stim_name = 'Alison'

pre_sp_array = np.load(f'{top_dir}/{folder}/spectrograms/normalized/zscore/CSC1_LA1_{current_stim_index}_{current_stim_name}_post_zscore.npy')
plt.pcolormesh(times, freq[:idx], np.mean(pre_sp_array[:,:idx,:], axis=0), cmap='jet', shading='gouraud')
plt.show()

In [None]:
#create organized plots
freq = np.load(f'{top_dir}/{folder}/spectrograms/freq.npy')
t = np.load(f'{top_dir}/{folder}/spectrograms/times.npy')
#we will work only with frequencies below 200
idx = np.where(freq == hf.find_nearest(freq, 130))[0][0]
freq = freq[0:idx+1]

for i in range(len(df_patient_info['channel_name'])):
    ch = df_patient_info.loc[i,'channel_name']
    ch_site = df_patient_info.loc[i,'recording_site']
    
    #norm_type = 'db'
    #plotting.plot_organized_spectrograms(df_stim_info, all_stim_name, t, freq, idx, ch, ch_site, folder, norm_type, alpha, method, 'wilcoxon')
    
    norm_type = 'zscore'
    plotting.plot_organized_spectrograms(df_stim_info, all_stim_name, t, freq, idx, ch, ch_site, folder, norm_type, alpha, method, 'wilcoxon')
    
    

In [None]:
#create plots
freq = np.load(f'{top_dir}/{folder}/spectrograms/freq.npy')
t = np.load(f'{top_dir}/{folder}/spectrograms/times.npy')
#we will work only with frequencies below 200
idx = np.where(freq == hf.find_nearest(freq, 130))[0][0]
freq = freq[0:idx+1]

for i in range(len(df_patient_info['channel_name'])):
    ch = df_patient_info.loc[i,'channel_name']
    ch_site = df_patient_info.loc[i,'recording_site']
    
    norm_type = 'db'
    plotting.plot_all_spectrograms_separately(df_stim_info, all_stim_name, t, freq, idx, ch, ch_site, folder, norm_type, alpha, method)
    
    norm_type = 'zscore'
    plotting.plot_all_spectrograms_separately(df_stim_info, all_stim_name, t, freq, idx, ch, ch_site, folder, norm_type, alpha, method)