In [52]:
import os

# Path for phoneme files
phoneme_path = '/media/monica/128 GB/htkCorrectedLab/lab/txt/phn'
file_prefix = 'ISLE_SESS0003_BLOCKD01_'
file_suffix = '_sprt1.txt'

output_list = []

# Function to read phoneme data and extract nuclei
def get_nuclei_from_phn(file_path):
    nuclei = []
    try:
        with open(file_path, 'r') as phn_file:
            for line in phn_file:
                parts = line.strip().split()
                if len(parts) == 3 and parts[2] not in ['SIL']:  # Ignore silence
                    start, end, phoneme = float(parts[0]), float(parts[1]), parts[2]
                    if phoneme in {'A', 'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'EH', 'ER', 'EY', 'IH', 'IY', 'OW', 'OY', 'UH', 'UW'}:  # Only vowels
                        nuclei.append((phoneme, start, end))
    except FileNotFoundError:
        print(f"Phoneme file not found: {file_path}")
    except Exception as e:
        print(f"Error reading phoneme file {file_path}: {e}")
    return nuclei

for i in range(1, 82):  # From 1 to 81
    phn_file = os.path.join(phoneme_path, f"{file_prefix}{i:02d}{file_suffix}")
    wav_file_name = f"{file_prefix}{i:02d}_sprt1.wav"

    try:
        # Extract nuclei from the phoneme file
        nuclei = get_nuclei_from_phn(phn_file)

        # Generate output for the phoneme nuclei
        for nucleus in nuclei:
            nucleus_phoneme, nucleus_start, nucleus_end = nucleus

            # Fix the f-string with proper quoting for the nucleus information
            output_line = (
                f'"{wav_file_name}", [({nucleus_phoneme}, {nucleus_start:.4f}, {nucleus_end:.4f})]'
            )
            output_list.append(output_line)

    except FileNotFoundError:
        print(f"Phoneme file not found: {phn_file}")
        continue
    except Exception as e:
        print(f"Error processing {phn_file}: {e}")
        continue

# Print all outputs
for line in output_list:
    print(line)


"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [(AY, 0.1400, 0.2000)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [(EH, 0.3200, 0.3800)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [(AY, 0.6100, 0.9000)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [(AO, 1.0700, 1.1500)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [(EY, 1.2500, 1.5000)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [(AY, 0.1800, 0.2800)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [(EH, 0.4400, 0.5500)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [(UW, 0.7900, 1.0700)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [(AO, 1.2100, 1.2900)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [(OW, 1.4900, 1.8500)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [(AY, 0.2900, 0.3600)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [(EH, 0.4800, 0.5500)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [(AE, 0.7000, 0.9300)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [(AO, 1.1600, 1.2600)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [(EH, 1.3700, 1.6300)]
"ISLE_SESS0003_BLOCKD01_04_sprt1.wav", [(AY, 0.2300, 0.2600)]
"ISLE_SE

In [2]:
import scipy.io

# Path to the .mat file
file_path = '/media/monica/128 GB/htkCorrectedLab/lab/mat/sylStress/ISLE_SESS0003_BLOCKD01_01_sprt1.mat'

# Load the .mat file
mat_contents = scipy.io.loadmat(file_path)

# Display the available keys in the .mat file
print("Keys in the .mat file:", mat_contents.keys())

# Access specific data fields (replace 'spurtSyl' with your desired key)
spurtSyl = mat_contents.get('spurtSyl', None)
spurtSylTimes = mat_contents.get('spurtSylTimes', None)
spurtStress = mat_contents.get('spurtStress', None)

# Display data for specific keys
print("spurtSyl data:", spurtSyl)
print("spurtSylTimes data:", spurtSylTimes)
print("spurtStress data:", spurtStress)


Keys in the .mat file: dict_keys(['__header__', '__version__', '__globals__', 'spurtSyl', 'spurtSylTimes', 'spurtStress'])
spurtSyl data: [[array(['AY'], dtype='<U2')]
 [array(['S EH D'], dtype='<U6')]
 [array(['W AY T'], dtype='<U6')]
 [array(['N AO T'], dtype='<U6')]
 [array(['B EY T S'], dtype='<U8')]]
spurtSylTimes data: [[0.14 0.2 ]
 [0.2  0.47]
 [0.47 0.99]
 [0.99 1.19]
 [1.19 1.69]]
spurtStress data: [[array(['P'], dtype='<U1')]
 [array(['P'], dtype='<U1')]
 [array(['P'], dtype='<U1')]
 [array(['P'], dtype='<U1')]
 [array(['P'], dtype='<U1')]]


In [5]:
import scipy.io

# Path to the .mat file
file_path = '/media/monica/128 GB/htkCorrectedLab/lab/mat/sylStress/ISLE_SESS0003_BLOCKD01_01.mat'

# Load the .mat file
mat_contents = scipy.io.loadmat(file_path)

# Extract the relevant data
spurtSyl = mat_contents.get('spurtSyl', [])
spurtSylTimes = mat_contents.get('spurtSylTimes', [])

# Prepare the output format
wav_file_name = "ISLE_SESS0003_BLOCKD01_01_sprt1.wav"
syllable_data = [
    (syllable[0][0], float(times[0]), float(times[1]))
    for syllable, times in zip(spurtSyl, spurtSylTimes)
]

# Generate the required output string
output = f'"{wav_file_name}", {syllable_data},'
print(output)


"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [('AY', 0.14, 0.2), ('S EH D', 0.2, 0.47), ('W AY T', 0.47, 0.99), ('N AO T', 0.99, 1.19), ('B EY T S', 1.19, 1.69)],


In [57]:
import scipy.io
import os

# Base path and file naming structure
base_path = '/media/monica/128 GB/htkCorrectedLab/lab/mat/sylStress/'
file_prefix = 'ISLE_SESS0003_BLOCKD01_'
file_suffix = '_sprt1.mat'
# Path for phoneme files
phoneme_path = '/media/monica/128 GB/htkCorrectedLab/lab/txt/phn'

# Function to read phoneme data and extract nuclei within a syllable time range
def get_nuclei_from_phn(file_path, syllable_start, syllable_end):
    nuclei = []
    try:
        with open(file_path, 'r') as phn_file:
            for line in phn_file:
                parts = line.strip().split()
                if len(parts) == 3 and parts[2] not in ['SIL']:  # Ignore silence
                    start, end, phoneme = float(parts[0]), float(parts[1]), parts[2]
                    # Check if the phoneme is a vowel and its timing overlaps with the syllable
                    if phoneme in {'A', 'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'EH', 'ER', 'EY', 'IH', 'IY', 'OW', 'OY', 'UH', 'UW', 'AX', 'AI'} and (start >= syllable_start and end <= syllable_end):
                        nuclei.append((phoneme, start, end))
    except FileNotFoundError:
        print(f"Phoneme file not found: {file_path}")
    except Exception as e:
        print(f"Error reading phoneme file {file_path}: {e}")
    return nuclei

# Prepare to iterate through all file indices
output_list = []

for i in range(1, 82):  # From 1 to 81
    mat_file_name = f"{file_prefix}{i:02d}{file_suffix}"
    wav_file_name = f"{file_prefix}{i:02d}_sprt1.wav"
    mat_file_path = os.path.join(base_path, mat_file_name)
    phn_file_path = os.path.join(phoneme_path, f"{file_prefix}{i:02d}{file_suffix.replace('.mat', '.txt')}")
    
    # Load the .mat file
    try:
        mat_contents = scipy.io.loadmat(mat_file_path)
        spurtSyl = mat_contents.get('spurtSyl', [])
        spurtSylTimes = mat_contents.get('spurtSylTimes', [])

        # Prepare syllable data and align with nuclei
        if len(spurtSyl) != len(spurtSylTimes):
            print(f"Warning: Mismatched syllable and time lengths in {mat_file_name}")
        
        for syllable, times in zip(spurtSyl, spurtSylTimes):
            syllable_start = float(times[0])
            syllable_end = float(times[1])
            
            # Extract nuclei (vowel phonemes) within the syllable time range
            nuclei = get_nuclei_from_phn(phn_file_path, syllable_start, syllable_end)
            
            # If no nuclei found, skip this syllable
            if not nuclei:
                continue
            
            # Collect syllable and nucleus data
            syllable_str = " ".join([phoneme[0] for phoneme in syllable])  # Assuming syllable[0] contains phonemes
            for nucleus in nuclei:
                nucleus_phoneme, nucleus_start, nucleus_end = nucleus
                # Format the output for syllable and nucleus
                output = (
                    f'"{wav_file_name}", [("{syllable_str}", {syllable_start:.4f}, {syllable_end:.4f})], '
                    f"[({nucleus_phoneme}, {nucleus_start:.4f}, {nucleus_end:.4f})]"
                )
                output_list.append(output)

    except FileNotFoundError:
        print(f"File not found: {mat_file_path}")
        continue
    except Exception as e:
        print(f"Error processing {mat_file_path}: {e}")
        continue

# Print all outputs
for line in output_list:
    print(line)


"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("AY", 0.1400, 0.2000)], [(AY, 0.1400, 0.2000)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("S EH D", 0.2000, 0.4700)], [(EH, 0.3200, 0.3800)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("W AY T", 0.4700, 0.9900)], [(AY, 0.6100, 0.9000)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("N AO T", 0.9900, 1.1900)], [(AO, 1.0700, 1.1500)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("B EY T S", 1.1900, 1.6900)], [(EY, 1.2500, 1.5000)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("AY", 0.1800, 0.2800)], [(AY, 0.1800, 0.2800)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("S EH D", 0.2800, 0.5800)], [(EH, 0.4400, 0.5500)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("N Y UW", 0.5800, 1.0700)], [(UW, 0.7900, 1.0700)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("N AO T", 1.1400, 1.3400)], [(AO, 1.2100, 1.2900)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("N OW", 1.3400, 1.8500)], [(OW, 1.4900, 1.8500)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [("AY", 0.2900, 0.3600)], [(AY, 0.2900, 0.3600)]


In [48]:
import scipy.io
import os

# Base path and file naming structure
base_path = '/media/monica/128 GB/htkCorrectedLab/lab/mat/sylStress/'
file_prefix = 'ISLE_SESS0003_BLOCKD01_'
file_suffix = '_sprt1.mat'

# Prepare to iterate through all file indices
output_list = []

for i in range(1, 82):  # From 1 to 81
    mat_file_name = f"{file_prefix}{i:02d}{file_suffix}"
    wav_file_name = f"{file_prefix}{i:02d}_sprt1.wav"
    file_path = os.path.join(base_path, mat_file_name)
    
    # Load the .mat file
    try:
        mat_contents = scipy.io.loadmat(file_path)
        spurtSyl = mat_contents.get('spurtSyl', [])
        spurtSylTimes = mat_contents.get('spurtSylTimes', [])

        # Prepare syllable data and output in the exact format
        for syllable, times in zip(spurtSyl, spurtSylTimes):
            start_time = float(times[0])
            end_time = float(times[1])
            syllable_text = syllable[0][0]  # Assumed to be the first element of the syllable
            output = f'"{wav_file_name}", [("{syllable_text}", {start_time}, {end_time})]'
            output_list.append(output)

    except FileNotFoundError:
        print(f"File not found: {file_path}")
        continue
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        continue

# Print all outputs
for line in output_list:
    print(line)


"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("AY", 0.14, 0.2)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("S EH D", 0.2, 0.47)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("W AY T", 0.47, 0.99)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("N AO T", 0.99, 1.19)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("B EY T S", 1.19, 1.69)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("AY", 0.18, 0.28)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("S EH D", 0.28, 0.58)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("N Y UW", 0.58, 1.07)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("N AO T", 1.14, 1.34)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("N OW", 1.34, 1.85)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [("AY", 0.29, 0.36)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [("S EH D", 0.36, 0.61)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [("B AE D", 0.61, 0.97)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [("N AO T", 0.97, 1.31)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [("B EH D", 1.31, 1.77)]
"ISLE_SESS0003_BLOCKD01_04_sprt1.wav", [("AY", 0.23, 0

In [16]:
import scipy.io
import os

# Base path and file naming structure
base_path = '/media/monica/128 GB/htkCorrectedLab/lab/mat/sylStress/'
file_prefix = 'ISLE_SESS0003_BLOCKD01_'
file_suffix = '_sprt1.mat'

# Prepare to iterate through all file indices
output_list = []

for i in range(1, 82):  # From 1 to 81
    mat_file_name = f"{file_prefix}{i:02d}{file_suffix}"
    wav_file_name = f"{file_prefix}{i:02d}_sprt1.wav"
    file_path = os.path.join(base_path, mat_file_name)
    
    # Load the .mat file
    try:
        mat_contents = scipy.io.loadmat(file_path)
        spurtSyl = mat_contents.get('spurtSyl', [])
        spurtSylTimes = mat_contents.get('spurtSylTimes', [])

        # Prepare syllable data and output in the exact format
        for syllable, times in zip(spurtSyl, spurtSylTimes):
            start_time = float(times[0])
            end_time = float(times[1])
            syllable_text = syllable[0][0]  # Assumed to be the first element of the syllable
            output = f'"{wav_file_name}", [("{syllable_text}", {start_time}, {end_time})]'
            output_list.append(output)

    except FileNotFoundError:
        print(f"File not found: {file_path}")
        continue
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        continue

# Print all outputs
for line in output_list:
    print(line)


"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("AY", 0.14, 0.2)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("S EH D", 0.2, 0.47)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("W AY T", 0.47, 0.99)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("N AO T", 0.99, 1.19)]
"ISLE_SESS0003_BLOCKD01_01_sprt1.wav", [("B EY T S", 1.19, 1.69)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("AY", 0.18, 0.28)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("S EH D", 0.28, 0.58)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("N Y UW", 0.58, 1.07)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("N AO T", 1.14, 1.34)]
"ISLE_SESS0003_BLOCKD01_02_sprt1.wav", [("N OW", 1.34, 1.85)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [("AY", 0.29, 0.36)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [("S EH D", 0.36, 0.61)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [("B AE D", 0.61, 0.97)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [("N AO T", 0.97, 1.31)]
"ISLE_SESS0003_BLOCKD01_03_sprt1.wav", [("B EH D", 1.31, 1.77)]
"ISLE_SESS0003_BLOCKD01_04_sprt1.wav", [("AY", 0.23, 0

In [55]:
import scipy.io
import os

# Base path and file naming structure
base_path = '/media/monica/128 GB/htkCorrectedLab/lab/mat/sylStress/'
file_prefix = 'ISLE_SESS0003_BLOCKD01_'
file_suffix = '_sprt1.mat'

# Prepare to iterate through all file indices
output_list = []

for i in range(1, 82):  # From 1 to 81
    mat_file_name = f"{file_prefix}{i:02d}{file_suffix}"
    wav_file_name = f"{file_prefix}{i:02d}_sprt1.wav"
    file_path = os.path.join(base_path, mat_file_name)
    
    # Load the .mat file
    try:
        mat_contents = scipy.io.loadmat(file_path)
        spurtSyl = mat_contents.get('spurtSyl', [])
        spurtSylTimes = mat_contents.get('spurtSylTimes', [])

        # Prepare syllable data and output in the exact format
        for syllable, times in zip(spurtSyl, spurtSylTimes):
            start_time = float(times[0])
            end_time = float(times[1])
            syllable_text = syllable[0][0]  # Assumed to be the first element of the syllable
            output = f'"{wav_file_name}", [("{syllable_text}", {start_time}, {end_time})]'
            output_list.append(output)

    except FileNotFoundError:
        print(f"File not found: {file_path}")
        continue
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        continue

# Specify the file path where the output should be saved
output_file_path = 'p2tk-code-r18-python-syllabify/SYLLABLEFILE'  # Change this to your desired file path

# Open the output file in write mode and save the results
try:
    with open(output_file_path, 'w') as output_file:
        for line in output_list:
            output_file.write(line + '\n')
    print(f"Output successfully saved to {output_file_path}")
except Exception as e:
    print(f"Error saving output to file: {e}")


Output successfully saved to p2tk-code-r18-python-syllabify/SYLLABLEFILE


In [59]:
import scipy.io
import os

# Base path and file naming structure
base_path = '/media/monica/128 GB/htkCorrectedLab/lab/mat/sylStress/'
file_prefix = 'ISLE_SESS0003_BLOCKD01_'
file_suffix = '_sprt1.mat'
# Path for phoneme files
phoneme_path = '/media/monica/128 GB/htkCorrectedLab/lab/txt/phn'

# Function to read phoneme data and extract nuclei within a syllable time range
def get_nuclei_from_phn(file_path, syllable_start, syllable_end):
    nuclei = []
    try:
        with open(file_path, 'r') as phn_file:
            for line in phn_file:
                parts = line.strip().split()
                if len(parts) == 3 and parts[2] not in ['SIL']:  # Ignore silence
                    start, end, phoneme = float(parts[0]), float(parts[1]), parts[2]
                    # Check if the phoneme is a vowel and its timing overlaps with the syllable
                    if phoneme in {'A', 'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'EH', 'ER', 'EY', 'IH', 'IY', 'OW', 'OY', 'UH', 'UW', 'AX', 'AI', 'IX'} and (start >= syllable_start and end <= syllable_end):
                        nuclei.append((phoneme, start, end))
    except FileNotFoundError:
        print(f"Phoneme file not found: {file_path}")
    except Exception as e:
        print(f"Error reading phoneme file {file_path}: {e}")
    return nuclei

# Prepare to iterate through all file indices
output_list = []

for i in range(1, 82):  # From 1 to 81
    mat_file_name = f"{file_prefix}{i:02d}{file_suffix}"
    wav_file_name = f"{file_prefix}{i:02d}_sprt1.wav"
    mat_file_path = os.path.join(base_path, mat_file_name)
    phn_file_path = os.path.join(phoneme_path, f"{file_prefix}{i:02d}{file_suffix.replace('.mat', '.txt')}")
    
    # Load the .mat file
    try:
        mat_contents = scipy.io.loadmat(mat_file_path)
        spurtSyl = mat_contents.get('spurtSyl', [])
        spurtSylTimes = mat_contents.get('spurtSylTimes', [])

        # Prepare syllable data and align with nuclei
        if len(spurtSyl) != len(spurtSylTimes):
            print(f"Warning: Mismatched syllable and time lengths in {mat_file_name}")
        
        for syllable, times in zip(spurtSyl, spurtSylTimes):
            syllable_start = float(times[0])
            syllable_end = float(times[1])
            
            # Extract nuclei (vowel phonemes) within the syllable time range
            nuclei = get_nuclei_from_phn(phn_file_path, syllable_start, syllable_end)
            
            # If no nuclei found, skip this syllable
            if not nuclei:
                continue
            
            # Collect syllable and nucleus data
            syllable_str = " ".join([phoneme[0] for phoneme in syllable])  # Assuming syllable[0] contains phonemes
            for nucleus in nuclei:
                nucleus_phoneme, nucleus_start, nucleus_end = nucleus
                # Format the output for syllable and nucleus
                output = (
                    f'"{wav_file_name}", [("{syllable_str}", {syllable_start:.4f}, {syllable_end:.4f})], '
                    f"[({nucleus_phoneme}, {nucleus_start:.4f}, {nucleus_end:.4f})]"
                )
                output_list.append(output)

    except FileNotFoundError:
        print(f"File not found: {mat_file_path}")
        continue
    except Exception as e:
        print(f"Error processing {mat_file_path}: {e}")
        continue

# Save output to a file
output_file_path = 'p2tk-code-r18-python-syllabify/SYLLABLENUCLUES'  # You can change the file path here

try:
    with open(output_file_path, 'w') as output_file:
        for line in output_list:
            output_file.write(line + '\n')
    print(f"Output successfully saved to {output_file_path}")
except Exception as e:
    print(f"Error saving output to file: {e}")


Output successfully saved to p2tk-code-r18-python-syllabify/SYLLABLENUCLUES


In [3]:
import scipy.io
import os

# Base path and file naming structure
base_path = '/media/monica/128 GB/htkCorrectedLab/lab/mat/sylStress/'
file_prefix = 'ISLE_SESS0003_BLOCKD01_'
file_suffix = '_sprt1.mat'
# Path for phoneme files
phoneme_path = '/media/monica/128 GB/htkCorrectedLab/lab/txt/phn'

# Function to read phoneme data and extract nuclei within a syllable time range
def get_nuclei_from_phn(file_path, syllable_start, syllable_end):
    nuclei = []
    try:
        with open(file_path, 'r') as phn_file:
            for line in phn_file:
                parts = line.strip().split()
                if len(parts) == 3 and parts[2] not in ['SIL']:  # Ignore silence
                    start, end, phoneme = float(parts[0]), float(parts[1]), parts[2]
                    # Check if the phoneme is a vowel and its timing overlaps with the syllable
                    if phoneme in {'A', 'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'EH', 'ER', 'EY', 'IH', 'IY', 'OW', 'OY', 'UH', 'UW', 'AX', 'AI'} and (start >= syllable_start and end <= syllable_end):
                        nuclei.append((phoneme, start, end))
    except FileNotFoundError:
        print(f"Phoneme file not found: {file_path}")
    except Exception as e:
        print(f"Error reading phoneme file {file_path}: {e}")
    return nuclei

# Function to get stress values (1 for primary, 0 for unstressed)
def get_stress_value(stress_type):
    if stress_type == 'P':
        return 1
    elif stress_type == 'U':
        return 0
    else:
        return -1  # For handling other stress types (if applicable)

# Prepare to iterate through all file indices
output_list = []

for i in range(1, 82):  # From 1 to 81
    mat_file_name = f"{file_prefix}{i:02d}{file_suffix}"
    wav_file_name = f"{file_prefix}{i:02d}_sprt1.wav"
    mat_file_path = os.path.join(base_path, mat_file_name)
    phn_file_path = os.path.join(phoneme_path, f"{file_prefix}{i:02d}{file_suffix.replace('.mat', '.txt')}")
    
    # Load the .mat file
    try:
        mat_contents = scipy.io.loadmat(mat_file_path)
        spurtSyl = mat_contents.get('spurtSyl', [])
        spurtSylTimes = mat_contents.get('spurtSylTimes', [])
        spurtStress = mat_contents.get('spurtStress', [])

        # Prepare syllable data and align with nuclei
        if len(spurtSyl) != len(spurtSylTimes):
            print(f"Warning: Mismatched syllable and time lengths in {mat_file_name}")
        
        syllables = []
        vowels = []
        stresses = []

        for syllable, times, stress in zip(spurtSyl, spurtSylTimes, spurtStress):
            syllable_start = float(times[0])
            syllable_end = float(times[1])
            
            # Extract nuclei (vowel phonemes) within the syllable time range
            nuclei = get_nuclei_from_phn(phn_file_path, syllable_start, syllable_end)
            
            # If no nuclei found, skip this syllable
            if not nuclei:
                continue
            
            # Collect syllable text
            syllable_str = " ".join([phoneme[0] for phoneme in syllable])  # Assuming syllable[0] contains phonemes
            syllables.append((syllable_str, syllable_start, syllable_end))
            
            # Collect vowel data
            for nucleus in nuclei:
                vowels.append((nucleus[0], nucleus[1], nucleus[2]))
            
            # Collect stress data
            stress_value = get_stress_value(stress[0][0])  # Convert stress to 1 (primary) or 0 (unstressed)
            stresses.append(stress_value)

        # Format the output in the required structure
        syllables_str = str(syllables)
        vowels_str = str(vowels)
        stresses_str = str(stresses)

        output = f'"{wav_file_name}", {syllables_str}, {vowels_str}, {stresses_str}'
        output_list.append(output)

    except FileNotFoundError:
        print(f"File not found: {mat_file_path}")
        continue
    except Exception as e:
        print(f"Error processing {mat_file_path}: {e}")
        continue

# Save output to a file
output_file_path = 'p2tk-code-r18-python-syllabify/SYLLBLESTRESS'  # You can change the file path here

try:
    with open(output_file_path, 'w') as output_file:
        for line in output_list:
            output_file.write(line + '\n')
    print(f"Output successfully saved to {output_file_path}")
except Exception as e:
    print(f"Error saving output to file: {e}")


Output successfully saved to p2tk-code-r18-python-syllabify/SYLLBLESTRESS


In [5]:
import scipy.io
import os

# Base path and file naming structure
base_path = '/media/monica/128 GB/htkCorrectedLab/lab/mat/sylStress/'
file_prefix = 'ISLE_SESS0006_BLOCKD01_'
file_suffix = '_sprt1.mat'
# Path for phoneme files
phoneme_path = '/media/monica/128 GB/htkCorrectedLab/lab/txt/phn'

# Function to read phoneme data and extract nuclei within a syllable time range
def get_nuclei_from_phn(file_path, syllable_start, syllable_end):
    nuclei = []
    try:
        with open(file_path, 'r') as phn_file:
            for line in phn_file:
                parts = line.strip().split()
                if len(parts) == 3 and parts[2] not in ['SIL']:  # Ignore silence
                    start, end, phoneme = float(parts[0]), float(parts[1]), parts[2]
                    # Check if the phoneme is a vowel and its timing overlaps with the syllable
                    if phoneme in {'A', 'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'EH', 'ER', 'EY', 'IH', 'IY', 'OW', 'OY', 'UH', 'UW', 'AX', 'AI'} and (start >= syllable_start and end <= syllable_end):
                        nuclei.append((phoneme, start, end))
    except FileNotFoundError:
        print(f"Phoneme file not found: {file_path}")
    except Exception as e:
        print(f"Error reading phoneme file {file_path}: {e}")
    return nuclei

# Function to get stress values (1 for primary, 0 for unstressed)
def get_stress_value(stress_type):
    if stress_type == 'P':
        return 1
    elif stress_type == 'U':
        return 0
    else:
        return -1  # For handling other stress types (if applicable)

# Prepare to iterate through all file indices
output_list = []

for i in range(1, 80):  # From 1 to 80
    mat_file_name = f"{file_prefix}{i:02d}{file_suffix}"
    wav_file_name = f"{file_prefix}{i:02d}_sprt1.wav"
    mat_file_path = os.path.join(base_path, mat_file_name)
    phn_file_path = os.path.join(phoneme_path, f"{file_prefix}{i:02d}{file_suffix.replace('.mat', '.txt')}")
    
    # Load the .mat file
    try:
        mat_contents = scipy.io.loadmat(mat_file_path)
        spurtSyl = mat_contents.get('spurtSyl', [])
        spurtSylTimes = mat_contents.get('spurtSylTimes', [])
        spurtStress = mat_contents.get('spurtStress', [])

        # Prepare syllable data and align with nuclei
        if len(spurtSyl) != len(spurtSylTimes):
            print(f"Warning: Mismatched syllable and time lengths in {mat_file_name}")
        
        syllables = []
        vowels = []
        stresses = []

        for syllable, times, stress in zip(spurtSyl, spurtSylTimes, spurtStress):
            syllable_start = float(times[0])
            syllable_end = float(times[1])
            
            # Extract nuclei (vowel phonemes) within the syllable time range
            nuclei = get_nuclei_from_phn(phn_file_path, syllable_start, syllable_end)
            
            # If no nuclei found, skip this syllable
            if not nuclei:
                continue
            
            # Collect syllable text
            syllable_str = " ".join([phoneme[0] for phoneme in syllable])  # Assuming syllable[0] contains phonemes
            syllables.append((syllable_str, syllable_start, syllable_end))
            
            # Collect vowel data
            for nucleus in nuclei:
                vowels.append((nucleus[0], nucleus[1], nucleus[2]))
            
            # Collect stress data
            stress_value = get_stress_value(stress[0][0])  # Convert stress to 1 (primary) or 0 (unstressed)
            stresses.append(stress_value)

        # Format the output in the required structure
        syllables_str = str(syllables)
        vowels_str = str(vowels)
        stresses_str = str(stresses)

        output = f'"{wav_file_name}", {syllables_str}, {vowels_str}, {stresses_str}'
        output_list.append(output)

    except FileNotFoundError:
        print(f"File not found: {mat_file_path}")
        continue
    except Exception as e:
        print(f"Error processing {mat_file_path}: {e}")
        continue

# Save output to a file
output_file_path = 'p2tk-code-r18-python-syllabify/syllablestress3'  # You can change the file path here

try:
    with open(output_file_path, 'w') as output_file:
        for line in output_list:
            output_file.write(line + '\n')
    print(f"Output successfully saved to {output_file_path}")
except Exception as e:
    print(f"Error saving output to file: {e}")


File not found: /media/monica/128 GB/htkCorrectedLab/lab/mat/sylStress/ISLE_SESS0006_BLOCKD01_28_sprt1.mat
Output successfully saved to p2tk-code-r18-python-syllabify/syllablestress3
