In [3]:
import wave
import numpy as np
import shutil
import sys, os, os.path
from scipy.io import wavfile
from scipy.io.wavfile import write
import pandas as pd
import matplotlib.pyplot as plt
import csv
import librosa
import soundfile as sf

ModuleNotFoundError: No module named 'librosa'

## Functions to shorten audios

In [14]:
def get_wav_duration(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    duration = librosa.get_duration(y=audio, sr=sr)
    return duration, sr

def shorten_wav(input_file, output_file, target_duration, sr):
    audio, sr = librosa.load(input_file, sr=sr)
    target_frames = int(target_duration * sr)
    shortened_audio = audio[:target_frames]
    sf.write(output_file, shortened_audio, sr)

## Upload files

In [15]:
# Routes
control_folder_path = "TFM_MartaRey/datos/Vowels/Control/A"
pathological_folder_path = "TFM_MartaRey/datos/Vowels/Patologicas/A"
output_control24 = "TFM_MartaRey/datos/control_files_short_24khz"
output_pathological24 = "TFM_MartaRey/datos/pathological_files_short_24khz"
output_control44_1 = "TFM_MartaRey/datos/control_files_short_44_1khz"
output_pathological44_1 = "TFM_MartaRey/datos/pathological_files_short_44_1khz"

# Create folders and empty them if they already exist
for folder in [output_control24, output_pathological24, output_control44_1, output_pathological44_1]:
    if os.path.exists(folder):
        shutil.rmtree(folder)
    os.makedirs(folder)

## Shorten both Control and Pathological audios to the same lenght

In [16]:
# VOCAL 'A' CONTROL & PARKINSON SUBJECTS
   
def truncate_float(float_number, decimal_places):
    multiplier = 10 ** decimal_places
    return int(float_number * multiplier) / multiplier
# Find shortest duration 
shortest_duration = float('inf')

for folder_path in [control_folder_path, pathological_folder_path]:
    file_names = [file for file in os.listdir(folder_path) if file.endswith('.wav')]
    for file_name in file_names:
        file_path = os.path.join(folder_path, file_name)
        duration = get_wav_duration(file_path)[0]
        duration = truncate_float(duration, 2)
        if duration < shortest_duration:
            shortest_duration = duration

# Shorten all files to that duration
def shorten_wav_files_24(folder_path, output_folder, shortest_duration):
    file_names = [file for file in os.listdir(folder_path) if file.endswith('.wav')]
    for file_name in file_names:
        input_file = os.path.join(folder_path, file_name)
        output_file = os.path.join(output_folder, file_name.replace('.wav', '_shortened.wav'))
        shorten_wav(input_file, output_file, shortest_duration, 24000)

def shorten_wav_files_44_1(folder_path, output_folder, shortest_duration):
    file_names = [file for file in os.listdir(folder_path) if file.endswith('.wav')]
    for file_name in file_names:
        input_file = os.path.join(folder_path, file_name)
        output_file = os.path.join(output_folder, file_name.replace('.wav', '_shortened.wav'))
        shorten_wav(input_file, output_file, shortest_duration, 44100)


In [17]:
shorten_wav_files_24(control_folder_path, output_control24, shortest_duration)
shorten_wav_files_24(pathological_folder_path, output_pathological24, shortest_duration)

shorten_wav_files_44_1(control_folder_path, output_control44_1, shortest_duration)
shorten_wav_files_44_1(pathological_folder_path, output_pathological44_1, shortest_duration)

print("Process completed.")

Process completed.


## Audio to CSV

In [None]:
def save_csv(audio_path, output_csv):
    '''
    Create a csv file from the audio file 
    audio_path: path to the audio file
    output_csv: path to the output csv file
    '''
    # Create the output folder
    # If the folder already exists, delete it and create a new one
    if os.path.exists(output_csv):
        shutil.rmtree(output_csv)
    os.makedirs(output_csv, exist_ok=True)
    
    input_filenames = [file for file in os.listdir(audio_path) if file.endswith('.wav')]
    for name in input_filenames:
        if name[-3:] != 'wav':
            print('WARNING!! Input File format should be *.wav')
            sys.exit()

        sr, data = wavfile.read(os.path.join(audio_path, name))
        # Save in csv format
        wavData = pd.DataFrame(data)
        wavData.columns = ['M']
        # Save in csv format
        wavData.to_csv(os.path.join(output_csv, name[:-4] + ".csv"), mode='w')
