# Create stimuli for training and test for MB3-Rules

In [171]:
import glob
import pandas as pd
import numpy as np
from pydub import AudioSegment

In [118]:
wavfiles = pd.DataFrame({'fname':glob.glob('./FinalStimuliCopy/*.wav')}) # Get list of files
#wavfiles = pd.DataFrame({'fname':wavfiles['fname'].apply(lambda x: x[-6:])}) # Extract .wav filenames
syllables=wavfiles['fname'].apply(lambda x: x[-6:-4]) # Extract list of syllables
wavfiles['syllable']=syllables # STore list of syllables in df
wavfiles.head(3) # Check

Unnamed: 0,fname,syllable
0,./FinalStimuliCopy/ke.wav,ke
1,./FinalStimuliCopy/fu.wav,fu
2,./FinalStimuliCopy/fa.wav,fa


### Define training stimuli
#### NB: first COLUMN is all the A syllables, and the remaining columns are the B syllables, placed in rows such that the  first column (A) syllables can be combined with all other syllables of that row (B syllables) (except the xx in last row)

In [95]:
trainingset = np.array(['ku', 'me', 'fa', 'pe'], dtype=object)
trainingset = np.vstack([trainingset,['ke',  'fa', 'fu', 'pu']])
trainingset = np.vstack([trainingset,['mu',  'ka', 'fa', 'pe']])
trainingset = np.vstack([trainingset,['ma',  'fu', 'pe', 'pu']])
trainingset = np.vstack([trainingset,['fe',  'ka', 'pe', 'pu']])
trainingset = np.vstack([trainingset,['pa',  'me', 'fu', 'xx']])
trainingset # Check

array([['ku', 'me', 'fa', 'pe'],
       ['ke', 'fa', 'fu', 'pu'],
       ['mu', 'ka', 'fa', 'pe'],
       ['ma', 'fu', 'pe', 'pu'],
       ['fe', 'ka', 'pe', 'pu'],
       ['pa', 'me', 'fu', 'xx']], dtype=object)

### Define test stimuli
#### Each row has an allowed combination

In [96]:
testset = np.array([['ti', 'so'],['si', 'to'],['no', 'li'],['lo', 'ni']])
testset

array([['ti', 'so'],
       ['si', 'to'],
       ['no', 'li'],
       ['lo', 'ni']], dtype='<U2')

## Create training sentences for ABB and ABA

In [167]:
# First, load the 250ms silence file
wavSIL250 = AudioSegment.from_wav('./SILfiles/SIL250ms.wav')

In [170]:
for syllA in np.arange(0,trainingset.shape[0]): # The first syllable of each row..
    dumA = trainingset[syllA,0] # A syllable name
    for syllB in np.arange(1,trainingset.shape[1]):
        dumB = trainingset[syllA,syllB] # B syllable name
        if not(dumB=='xx'): # the last entry..
            # Get names of wave files in dummy variables
            dumAfname = wavfiles.loc[wavfiles['syllable']==dumA,'fname'].to_list()[0] 
            dumBfname = wavfiles.loc[wavfiles['syllable']==dumB,'fname'].to_list()[0]
            # Read in wave info from A and B syllable files
            wavA = AudioSegment.from_wav(dumAfname)
            wavB = AudioSegment.from_wav(dumBfname)
            
            # Combine all wavs into one including 250ms silences: ABB
            outwav = wavA + wavSIL250 + wavB + wavSIL250 + wavB
            # Write to file
            sentencename = './TrainingSentences/ABB/'+dumA+dumB+dumB+'.wav'
            outwav.export(sentencename, format="wav")

            # Combine all wavs into one including 250ms silences: ABA
            outwav = wavA + wavSIL250 + wavB + wavSIL250 + wavA
            # Write to file
            sentencename = './TrainingSentences/ABA/'+dumA+dumB+dumA+'.wav'
            outwav.export(sentencename, format="wav")
print('Done!')

Done!


## Then, create test sentences for ABB and ABA

In [172]:
# First, load the 250ms silence file
wavSIL250 = AudioSegment.from_wav('./SILfiles/SIL250ms.wav')

In [175]:
for syllA in np.arange(0,testset.shape[0]): # The first syllable of each row..
    dumA = testset[syllA,0] # A syllable name
    dumB = testset[syllA,1] # A syllable name
    
    # Get names of wave files in dummy variables
    dumAfname = wavfiles.loc[wavfiles['syllable']==dumA,'fname'].to_list()[0] 
    dumBfname = wavfiles.loc[wavfiles['syllable']==dumB,'fname'].to_list()[0]
    # Read in wave info from A and B syllable files
    wavA = AudioSegment.from_wav(dumAfname)
    wavB = AudioSegment.from_wav(dumBfname)

    # Combine all wavs into one including 250ms silences: ABB
    outwav = wavA + wavSIL250 + wavB + wavSIL250 + wavB
    # Write to file
    sentencename = './TestSentences/ABB/'+dumA+dumB+dumB+'.wav'
    outwav.export(sentencename, format="wav")

    # Combine all wavs into one including 250ms silences: ABA
    outwav = wavA + wavSIL250 + wavB + wavSIL250 + wavA
    # Write to file
    sentencename = './TestSentences/ABA/'+dumA+dumB+dumA+'.wav'
    outwav.export(sentencename, format="wav")
print('Done!')

Done!


# Make example ABB, ABA trainining stimulus example

### First ABB

In [204]:
np.random.seed(0) # Make these replicable
ABBexample_wavfiles = np.array(glob.glob('./TrainingSentences/ABB/*.wav')) # Get list of files
ABBexample_wavfiles = np.random.permutation(ABBexample_wavfiles)

# Load the 1000ms silence file (silence between sentences)
wavSIL250 = AudioSegment.from_wav('./SILfiles/SIL250ms.wav')
wavSIL1000 = AudioSegment.from_wav('./SILfiles/SIL1000ms.wav')

# Initialize with a short silence
outwav = wavSIL250
# Iterate over remaining files
for wavfname in ABBexample_wavfiles:
    dumwav = AudioSegment.from_wav(wavfname)
    # Add in
    outwav += dumwav + wavSIL1000
# Add in final silence
outwav += wavSIL250

# Write out
outwav.export('ABBexample.wav', format="wav")
print('Done!')

Done!


### Then ABA

In [205]:
np.random.seed(0) # Make these replicable
ABAexample_wavfiles = np.array(glob.glob('./TrainingSentences/ABA/*.wav')) # Get list of files
ABAexample_wavfiles = np.random.permutation(example_wavfiles)

# Load the 1000ms silence file (silence between sentences)
wavSIL250 = AudioSegment.from_wav('./SILfiles/SIL250ms.wav')
wavSIL1000 = AudioSegment.from_wav('./SILfiles/SIL1000ms.wav')

# Initialize with a short silence
outwav = wavSIL250
# Iterate over remaining files
for wavfname in ABAexample_wavfiles:
    dumwav = AudioSegment.from_wav(wavfname)
    # Add in
    outwav += dumwav + wavSIL1000
# Add in final silence
outwav += wavSIL250

# Write out
outwav.export('ABAexample.wav', format="wav")
print('Done!')

Done!
