In [1]:
import numpy as np
from scipy.io.wavfile import read
import matplotlib.pyplot as plt
from scipy import signal
import glob
import os
import pickle
%matplotlib inline

from oe_acute import trial_utils as tu
from oe_acute import MNE

In [2]:
# PREPROCESSING STEPS
# - Resample stimuli to 24 kHz
# - Spectrogram: nfft=128, Hanning window len 128, 50% overlap
# - Remove freq zero (DC) component; leaves 64 frequency bands (nfft/2)
# - Avg freq bands pairwise twice
# - Average time bins pairwise thrice
# - convert to LOG SCALE!
stimulus_wavefile = '/mnt/cube/srrudrar/METstim/3_L2_G105_s_01_@1___L6211_s_20@2.wav.sine'

In [3]:
# preprocess all stimuli
stim_folder = '/mnt/cube/srrudrar/experiments/MET_prediction_expt/'
output_folder = '/mnt/cube/srrudrar/stim_preprocess/temporal_model/B1240'

os.makedirs(output_folder, exist_ok=True)

stim_files = glob.glob(os.path.join(stim_folder, '*.sine'))

spec_next_list = []
spec_segs_list = []

In [4]:
for stim_file in stim_files:
    stim_name = os.path.split(stim_file)[-1]
    stim_name = stim_name.split('.')[0]
    nsegbins = 16
    nperseg = 128
    noverlap=64
    nfft = 128
    
    # Resampling ditionary
    rsamp_dict = {44100: (80, 147), 48000: (1, 2)}
    
    # read wave file
    stim_fs, stimulus_stereo = read(stim_file)

    # extract signal channel
    stimulus_data = stimulus_stereo[:, 1]
    
    # Resample to 24 kHz
    up, down = rsamp_dict[stim_fs]
    stim_resamp = signal.resample_poly(stimulus_data, up, down)
    
    # Spectrogram
    f, ts, Sxx = signal.spectrogram(stim_resamp, 24000, window='hanning', nperseg=nperseg, noverlap=noverlap, nfft=nfft)
    
    # Remove DC and log scale
    spec = 10*np.log10(Sxx[1:, :])
    f = f[1:]

    spec = MNE.kozlov_averaging(spec)
    #spec1 = spec[:,20:]
    #print(spec1.shape[-1])
        
    # Now extract 16 x nbins samples
    # This list of TUPLES is of the form (sample, array(stimulus))
    # sample is the last bin of the stimulus.  stimulus contains all bins in the nbins prior to sample

    # old behavior was to include a bin index but it's not ever used
    #segs = [(n+nsegbins, spec[:, n:(n+nsegbins)]) for n in range(len(spec.T)-nsegbins)]
    segs = [spec[:, n:(n+nsegbins)] for n in range(len(spec.T)-nsegbins-1)]
    spec1 = [spec[:, n+nsegbins+1] for n in range(len(spec.T)-nsegbins-1)]
    nsegs = len(segs)
    #print(nsegs)
    spec_segs_list.append(segs)
    spec_next_list.append(spec1)
    
    #spec_all.append(spec)

In [5]:
len(spec_segs_list[0]), len(spec_next_list[0])

(2816, 2816)

In [6]:
spec_segs_arr = np.vstack(spec_segs_list)
spec_next_arr = np.vstack(spec_next_list)

In [7]:
np.shape(spec_segs_arr), np.shape(spec_next_arr)

((34581, 16, 16), (34581, 16))

In [8]:
spec_arr = np.hstack(spec_all)

In [10]:
np.save('/mnt/cube/srrudrar/stim_preprocess/temporal_model/spec_arr.npy', spec_arr)

In [12]:
spec_next_name = '/mnt/cube/srrudrar/stim_preprocess/temporal_model/spec_next_list.pkl'
with open(spec_next_name, 'wb') as f:
    pickle.dump(spec_next_list, f)

spec_segs_name = '/mnt/cube/srrudrar/stim_preprocess/temporal_model/spec_segs_list.pkl'
with open(spec_segs_name, 'wb') as f:
    pickle.dump(spec_segs_list, f)