In [1]:
import numpy as np

# Set the seed value for experiment reproducibility.
# seed = 42
# np.random.seed(seed)

#### Step One - Convert the .LOG file in order to calculate silence time

In [2]:
import pandas as pd

def convert_log(log_path):
    df1 = pd.read_csv(log_path,encoding='iso-8859-1') # load the saved csv file into variable

    df1.columns = ['title'] # rename the single column to 'title'
    df1[['date', 'time', 'file', 'length']] = df1['title'].str.split('	', 4, expand=True) # use the new name (title) to split into different columns 
    ## IMPORTANT NOTICE: the 'split' characther must be copied from the .LOG file for this to work
    df1 = df1.drop('title', 1) # delete the 'title' column, there is no need to use it 
    df1[['rec', 's']] = df1['length'].str.split(' ', 2, expand=True) # split 'length' into value and 's'
    df1[['hour', 'minute', 'seconds']] = df1['time'].str.split(':', 3, expand=True) # split 'time' to 'hour' 'minute' and 'seconds'
    df1 = df1.drop('time', 1) # delete the 'time' column, there is no need to use it 

    for index, row in df1.iterrows(): # iterate over rows and indices 
        # we need to delete all line that contain irrelevant data such as "Monitoring Started" etc.
        # check if row contain the right data:
        if 's' not in str(row['length']):
            df1.drop(index, inplace=True)

    df1 = df1.drop('length', 1) # delete the 'length' column, there is no need to use it 
    df1 = df1.drop('s', 1) # delete the 's' column, there is no need to use it 
    df1.reset_index(drop=True, inplace=True)
    return df1

#### Step Two - Calculate the silence between recordings

In [3]:
import numpy as np

def calc_silence(df1):
    seconds = df1["seconds"].tolist()
    seconds = np.array(seconds) # "seconds" column to list

    minute = df1["minute"].tolist()
    minute = np.array(minute) # "minutes" column to list

    rec = df1["rec"].tolist()
    rec = np.array(rec) # "rec" column to list

    length = seconds.size
    silence = []
    for i in range(length-1): # calculate silence between recordings:
        if float(minute[i+1]) > float(minute[i]):
            silence.append(float(seconds[i+1]) + float(60) - float(seconds[i]) - float(rec[i]))
        elif float(seconds[i+1]) - float(seconds[i]) > float(rec[i]):
            silence.append(float(seconds[i+1]) - float(seconds[i]) - float(rec[i]))
        else:
            silence.append(0)

    silence.append(0)
    silence = np.array(silence)
    silence = silence * 1000
    df1['silence'] = silence.tolist() # output into 

    return df1

#### Step Three - Combine all recordings in folder and pad the silence 

In [4]:
from pydub import AudioSegment
import os # Needed for os.listdir
import random

def combine(dataframe, full_path, export_name):
    LOW = 35
    HIGH = 65
    THRESHOLD = 35
    combined = AudioSegment.silent(duration=0)
    file_length = 0
    part = 1
    for index, row in dataframe.iterrows(): # iterate over rows and indices 
        filename = str(row['file'])
        path_to_file = full_path + filename
        if os.path.exists(path_to_file):
            if filename.endswith(".wav") or filename.endswith(".WAV"): # Check if the file ends with .wav
                print("current audio file:", full_path + filename)
                audio = AudioSegment.from_wav(full_path + filename)
                print("current silence length:", dataframe["silence"][index])
                silence = AudioSegment.silent(duration=dataframe["silence"][index]) # silence time has to be in mili-second
                file_length += float(dataframe["silence"][index])/1000 + float(dataframe["rec"][index])
                print("total file length until now:", file_length)
                padded = audio + silence
                combined = combined + padded
                print("sound file combined successfully:", path_to_file)
                rand_length = random.randint(LOW, HIGH)
                if file_length >= rand_length:
                    newFilename = ".wav"
                    combined.export(export_name + "_p" + str(part) + newFilename, format='wav')
                    combined = AudioSegment.silent(duration=0)
                    part += 1
                    file_length = 0
        else:
            print("sound file was not found in .LOG file:", path_to_file)
    if file_length >= THRESHOLD: 
        newFilename = ".wav"
        combined.export(export_name + "_p" + str(part) + newFilename, format='wav')
    return



#### Step Four - Split into two lables HT and WT

In [5]:
import os, os.path, shutil

def split_data(raw_data):
    output = r'split_data_increased/' # new folder for the new splited data (will be created if does not exist)
    HT = r'HT/'
    WT = r'WT/'

    if not os.path.exists(output):
        os.makedirs(output)

    if not os.path.exists(output + HT):
        os.makedirs(output + HT)

    if not os.path.exists(output + WT):
        os.makedirs(output + WT)

    for filename in os.listdir(raw_data): # Loop over all items in the path
        if "_HT" in filename and (filename.endswith(".wav") or filename.endswith(".WAV")):
            new_path = output + HT
            shutil.move(raw_data + "/" + filename, new_path)
            print("file moved successfully:", filename)
        elif "_WT" in filename and (filename.endswith(".wav") or filename.endswith(".WAV")):
            new_path = output + WT
            shutil.move(raw_data + "/" + filename, new_path)
            print("file moved successfully:", filename)
    return

#### Final Step - Running all functions together on the data (fingers crossed)

## 2015

### Session1

In [6]:
import pandas as pd

df = pd.read_excel(r'2015Rec.xlsx')

raw_data = r'2015' # data ROOT folder for sound files (highest hierarchy based on table)

for index, row in df.iterrows(): # iterate over rows and indices 

    # check if day 4 exist in table:
    if str(row['day 4']) == "V":
        day = "day_4"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 6 exist in table:
    if str(row['day 6']) == "V":
        day = "day_6"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 8 exist in table:
    if str(row['day 8']) == "V":
        day = "day_8"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 10 exist in table:
    if str(row['day 10']) == "V":
        day = "day_10"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 12 exist in table:
    if str(row['day 12']) == "V":
        day = "day_12"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

current folder path: 2015/08001P_HT/08130I_HT/day_12/session1/
exporting...: 2015/08001P_08130I_day_12_s1_HT
current audio file: 2015/08001P_HT/08130I_HT/day_12/session1/T0000001.WAV
current silence length: 9900.0
total file length until now: 11.0
sound file combined successfully: 2015/08001P_HT/08130I_HT/day_12/session1/T0000001.WAV
current audio file: 2015/08001P_HT/08130I_HT/day_12/session1/T0000002.WAV
current silence length: 800.0
total file length until now: 13.0
sound file combined successfully: 2015/08001P_HT/08130I_HT/day_12/session1/T0000002.WAV
current audio file: 2015/08001P_HT/08130I_HT/day_12/session1/T0000003.WAV
current silence length: 300.00000000000006
total file length until now: 15.0
sound file combined successfully: 2015/08001P_HT/08130I_HT/day_12/session1/T0000003.WAV
current audio file: 2015/08001P_HT/08130I_HT/day_12/session1/T0000004.WAV
current silence length: 199.99999999999994
total file length until now: 17.0
sound file combined successfully: 2015/08001P_HT

### Session2

In [7]:
import pandas as pd

df = pd.read_excel(r'2015Rec.xlsx')

raw_data = r'2015' # data ROOT folder for sound files (highest hierarchy based on table)

for index, row in df.iterrows(): # iterate over rows and indices 

    # check if day 4 exist in table:
    if str(row['day 4']) == "V":
        day = "day_4"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session2" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s2" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 6 exist in table:
    if str(row['day 6']) == "V":
        day = "day_6"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session2" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s2" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 8 exist in table:
    if str(row['day 8']) == "V":
        day = "day_8"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session2" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s2" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 10 exist in table:
    if str(row['day 10']) == "V":
        day = "day_10"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session2" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s2" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 12 exist in table:
    if str(row['day 12']) == "V":
        day = "day_12"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session2" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s2" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

current folder path: 2015/08001P_HT/08130I_HT/day_12/session2/
exporting...: 2015/08001P_08130I_day_12_s2_HT
current audio file: 2015/08001P_HT/08130I_HT/day_12/session2/T0000001.WAV
current silence length: 300.00000000000006
total file length until now: 2.0
sound file combined successfully: 2015/08001P_HT/08130I_HT/day_12/session2/T0000001.WAV
current audio file: 2015/08001P_HT/08130I_HT/day_12/session2/T0000002.WAV
current silence length: 4200.0
total file length until now: 8.0
sound file combined successfully: 2015/08001P_HT/08130I_HT/day_12/session2/T0000002.WAV
current audio file: 2015/08001P_HT/08130I_HT/day_12/session2/T0000003.WAV
current silence length: 4500.0
total file length until now: 14.0
sound file combined successfully: 2015/08001P_HT/08130I_HT/day_12/session2/T0000003.WAV
current audio file: 2015/08001P_HT/08130I_HT/day_12/session2/T0000004.WAV
current silence length: 800.0
total file length until now: 16.0
sound file combined successfully: 2015/08001P_HT/08130I_HT/day

In [8]:
split_data(raw_data)

file moved successfully: 08001P_08130I_day_12_s1_HT_p1.wav
file moved successfully: 08001P_08130I_day_12_s1_HT_p10.wav
file moved successfully: 08001P_08130I_day_12_s1_HT_p11.wav
file moved successfully: 08001P_08130I_day_12_s1_HT_p2.wav
file moved successfully: 08001P_08130I_day_12_s1_HT_p3.wav
file moved successfully: 08001P_08130I_day_12_s1_HT_p4.wav
file moved successfully: 08001P_08130I_day_12_s1_HT_p5.wav
file moved successfully: 08001P_08130I_day_12_s1_HT_p6.wav
file moved successfully: 08001P_08130I_day_12_s1_HT_p7.wav
file moved successfully: 08001P_08130I_day_12_s1_HT_p8.wav
file moved successfully: 08001P_08130I_day_12_s1_HT_p9.wav
file moved successfully: 08001P_08130I_day_12_s2_HT_p1.wav
file moved successfully: 08001P_08130I_day_12_s2_HT_p10.wav
file moved successfully: 08001P_08130I_day_12_s2_HT_p11.wav
file moved successfully: 08001P_08130I_day_12_s2_HT_p2.wav
file moved successfully: 08001P_08130I_day_12_s2_HT_p3.wav
file moved successfully: 08001P_08130I_day_12_s2_HT_

## 2018

### Session1

In [9]:
import pandas as pd

df = pd.read_excel(r'2018Rec.xlsx')

raw_data = r'2018' # data ROOT folder for sound files (highest hierarchy based on table)

for index, row in df.iterrows(): # iterate over rows and indices 

    # check if day 4 exist in table:
    if str(row['day 4']) == "V":
        day = "day_4"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 6 exist in table:
    if str(row['day 6']) == "V":
        day = "day_6"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 8 exist in table:
    if str(row['day 8']) == "V":
        day = "day_8"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 10 exist in table:
    if str(row['day 10']) == "V":
        day = "day_10"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 12 exist in table:
    if str(row['day 12']) == "V":
        day = "day_12"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

current folder path: 2018/17409I_WT/17477I_WT/day_6/session1/
exporting...: 2018/17409I_17477I_day_6_s1_WT
current audio file: 2018/17409I_WT/17477I_WT/day_6/session1/T0000001.wav
current silence length: 2900.0
total file length until now: 4.0
sound file combined successfully: 2018/17409I_WT/17477I_WT/day_6/session1/T0000001.wav
current audio file: 2018/17409I_WT/17477I_WT/day_6/session1/T0000002.wav
current silence length: 0.0
total file length until now: 5.5
sound file combined successfully: 2018/17409I_WT/17477I_WT/day_6/session1/T0000002.wav
current audio file: 2018/17409I_WT/17477I_WT/day_6/session1/T0000003.wav
current silence length: 4500.0
total file length until now: 11.5
sound file combined successfully: 2018/17409I_WT/17477I_WT/day_6/session1/T0000003.wav
current audio file: 2018/17409I_WT/17477I_WT/day_6/session1/T0000004.wav
current silence length: 6600.0
total file length until now: 19.5
sound file combined successfully: 2018/17409I_WT/17477I_WT/day_6/session1/T0000004.wa

In [10]:
split_data(raw_data)

file moved successfully: 17409I_17477I_day_10_s1_WT_p1.wav
file moved successfully: 17409I_17477I_day_10_s1_WT_p10.wav
file moved successfully: 17409I_17477I_day_10_s1_WT_p11.wav
file moved successfully: 17409I_17477I_day_10_s1_WT_p2.wav
file moved successfully: 17409I_17477I_day_10_s1_WT_p3.wav
file moved successfully: 17409I_17477I_day_10_s1_WT_p4.wav
file moved successfully: 17409I_17477I_day_10_s1_WT_p5.wav
file moved successfully: 17409I_17477I_day_10_s1_WT_p6.wav
file moved successfully: 17409I_17477I_day_10_s1_WT_p7.wav
file moved successfully: 17409I_17477I_day_10_s1_WT_p8.wav
file moved successfully: 17409I_17477I_day_10_s1_WT_p9.wav
file moved successfully: 17409I_17477I_day_12_s1_WT_p1.wav
file moved successfully: 17409I_17477I_day_12_s1_WT_p2.wav
file moved successfully: 17409I_17477I_day_12_s1_WT_p3.wav
file moved successfully: 17409I_17477I_day_6_s1_WT_p1.wav
file moved successfully: 17409I_17477I_day_6_s1_WT_p2.wav
file moved successfully: 17409I_17477I_day_6_s1_WT_p3.wa

## 2022

### Session1

In [11]:
import pandas as pd

df = pd.read_excel(r'2022Rec.xlsx')

raw_data = r'2022' # data ROOT folder for sound files (highest hierarchy based on table)

for index, row in df.iterrows(): # iterate over rows and indices 

    # check if day 4 exist in table:
    if str(row['day 4']) == "V":
        day = "day_4"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 6 exist in table:
    if str(row['day 6']) == "V":
        day = "day_6"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session1" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s1" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")


current folder path: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session1/
exporting...: 2022/22737K_22737K_1A_BLUE_day_4_s1_WT
current audio file: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session1/T0000001.wav
current silence length: 899.9999999999999
total file length until now: 2.0
sound file combined successfully: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session1/T0000001.wav
current audio file: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session1/T0000002.wav
current silence length: 899.9999999999999
total file length until now: 4.0
sound file combined successfully: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session1/T0000002.wav
current audio file: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session1/T0000003.wav
current silence length: 8900.0
total file length until now: 14.0
sound file combined successfully: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session1/T0000003.wav
current audio file: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session1/T0000004.wav
current silence length: 20900.0
total file length unt

### Session2

In [12]:
import pandas as pd

df = pd.read_excel(r'2022Rec.xlsx')

raw_data = r'2022' # data ROOT folder for sound files (highest hierarchy based on table)

for index, row in df.iterrows(): # iterate over rows and indices 

    # check if day 4 exist in table:
    if str(row['day 4']) == "V":
        day = "day_4"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session2" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s2" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")

    # check if day 6 exist in table:
    if str(row['day 6']) == "V":
        day = "day_6"
        folder_path = raw_data + "/" + str(row['Mother'])  + "_" + str(row['Maternal Genotype']) + "/" + str(row['name']) + "_" + str(row['Genotytpe']) + "/" + day + "/" + "session2" + "/" 
        print("current folder path:", folder_path)
        if os.listdir(folder_path): # if folder is not empty return "True"
            count_wav = 0
            count_log = 0
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".wav") or filename.endswith(".WAV"):
                        count_wav+=1
            for filename in os.listdir(folder_path):
                if os.listdir(folder_path):
                    if filename.endswith(".LOG"):
                        count_log+=1
            if count_wav != 0 and count_log != 0:
                for filename in os.listdir(folder_path): # Loop over all items in the path
                    if filename.endswith(".LOG"): # Check if the file ends with .LOG
                        log = filename
                log_path = folder_path + log
                df = convert_log(log_path)
                df = calc_silence(df)
                export_name = raw_data + "/" + row['Mother'] + "_" + row['name'] + "_" + day + "_" + "s2" + "_" + str(row['Genotytpe'])
                df.to_csv(export_name + '.csv') # save the file in .csv format for further use if needed
                print("exporting...:", export_name)
                combine(df, folder_path, export_name)
                print("finished exporting:", export_name)
            else:
                print("there are no .wav files in directory: ", folder_path)
        else: # else print folder is empty
            print("folder is empty")


current folder path: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session2/
exporting...: 2022/22737K_22737K_1A_BLUE_day_4_s2_WT
current audio file: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session2/T0000001.wav
current silence length: 28900.0
total file length until now: 30.0
sound file combined successfully: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session2/T0000001.wav
current audio file: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session2/T0000001.wav
current silence length: 0.0
total file length until now: 31.2
sound file combined successfully: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session2/T0000001.wav
current audio file: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session2/T0000002.wav
current silence length: 600.0000000000001
total file length until now: 33.2
sound file combined successfully: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session2/T0000002.wav
current audio file: 2022/22737K_HT/22737K_1A_BLUE_WT/day_4/session2/T0000003.wav
current silence length: 0.0
total file length until now: 35.3000

In [13]:
split_data(raw_data)

file moved successfully: 22731O_22731O_1A_BLUE_day_4_s1_HT_p1.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_4_s2_HT_p1.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_4_s2_HT_p2.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_4_s2_HT_p3.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_4_s2_HT_p4.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_4_s2_HT_p5.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_6_s1_HT_p1.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_6_s1_HT_p2.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_6_s1_HT_p3.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_6_s1_HT_p4.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_6_s2_HT_p1.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_6_s2_HT_p2.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_6_s2_HT_p3.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_6_s2_HT_p4.wav
file moved successfully: 22731O_22731O_1A_BLUE_day_6_s2_HT_p5.wav
file moved