# Cleaning the data

- Check the length of the audio file
- reduce to 5 sec sections
- Rename files

In [11]:
import os

def renaming_files(directory_path, new_file_name, file_type):
    # Convert file type to lowercase to ensure case-insensitive matching
    file_type = file_type.lower()

    # Get a list of all files in the directory with the specified file type (case-insensitive)
    files = [fi for fi in os.listdir(directory_path) if fi.lower().endswith(f'.{file_type}')]

    # Loop over the files and rename them
    for i, filename in enumerate(files, start=1):
        # Create the new filename
        new_name = f"{new_file_name}_{i}.{file_type}"
    
        # Get the full path of the original file and the new file
        old_file = os.path.join(directory_path, filename)
        new_file = os.path.join(directory_path, new_name)
    
        # Rename the file
        os.rename(old_file, new_file)

    print(f"Renamed {len(files)} files successfully.")
    
# Define the directory and new file name
directory_path = r"C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio files raw\other kiwi birds - Copy"
new_file_name = "other_kiwi_species"
file_type = "mp3"

# Call the function to rename the files
renaming_files(directory_path, new_file_name, file_type)


Renamed 34 files successfully.


In [46]:
import os
import librosa
import numpy as np
import soundfile as sf

def time_periods(file_path):
    """
    Generate time periods within the given start and end time strings.
    """
    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)

    # Get the length of the audio in seconds
    duration_sec = librosa.get_duration(y=y, sr=sr)
    end_time = 5 * int(duration_sec / 5)
    
    # Generate time periods
    time_period_list = []
    for i in range(0, end_time, 5):
        start = i
        end = min(i + 5, end_time)
        time_period_list.append((start, end))
        
    return time_period_list

def clip_audio(file_path, output_directory, base_name):
    """
    Clip the audio file into 5-second segments and save the segments.
    """
    # Load the audio file
    audio, sr = librosa.load(file_path, sr=None)
    
    # Generate list of time periods
    time_period_list = time_periods(file_path)
    
    # Process each time period
    for n, (start, end) in enumerate(time_period_list):
        start_sample = int(start * sr)
        end_sample = int(end * sr)
    
        # Slice the audio segment
        chopped_audio = audio[start_sample:end_sample]
    
        # Define output path with correct naming convention
        output_path = os.path.join(output_directory, f"{base_name}_sample_{n + 1}.wav")
        
        # Save the chopped audio to a new file
        sf.write(output_path, chopped_audio, sr)
        print(f"Saved: {output_path}")

def process_directory(input_directory, output_directory):
    """
    Process all audio files in the input directory.
    """
    # Create the output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)
    
    # Iterate over all files in the input directory
    for filename in os.listdir(input_directory):
        if filename.endswith(".mp3") or filename.endswith(".wav"):
            file_path = os.path.join(input_directory, filename)
            base_name, _ = os.path.splitext(filename)
            clip_audio(file_path, output_directory, base_name)

# Define the input and output directories
input_directory = r"C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio files raw\bellbird"
output_directory = r"C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\bellbird"

# Process all files in the input directory
process_directory(input_directory, output_directory)


Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\bellbird\Bellbird_1_sample_1.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\bellbird\Bellbird_1_sample_2.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\bellbird\Bellbird_1_sample_3.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\bellbird\Bellbird_1_sample_4.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\bellbird\Bellbird_1_sample_5.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\bellbird\Bellbird_1_sample_6.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\bellbird\Bellbird_1_sample_7.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN D

In [52]:
# Define the input and output directories
input_directory = r"C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio files raw\swamphen"
output_directory = r"C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\swamphen"

# Process all files in the input directory
process_directory(input_directory, output_directory)

Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\swamphen\Swamphen (pukeko)_1_sample_1.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\swamphen\Swamphen (pukeko)_1_sample_2.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\swamphen\Swamphen (pukeko)_1_sample_3.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\swamphen\Swamphen (pukeko)_1_sample_4.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\swamphen\Swamphen (pukeko)_1_sample_5.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\swamphen\Swamphen (pukeko)_2_sample_1.wav
Saved: C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\swamphen\Swamphen (pukeko)_2_sample_2.wav
Saved:

In [61]:
# Define the directory and new file name
directory_path = r"C:\Users\44778\OneDrive\Desktop\UWE_Docs\7. Dissertation\CNN Data Files\audio file 5 seconds\swamphen"
new_file_name = "swamphen"
file_type = "WAV"

# Call the function to rename the files
renaming_files(directory_path, new_file_name, file_type)

Renamed 67 files successfully.
