**Description**:

This script compute automatically JM labels over all the grazing and rumination bouts of this dataset in a non-supervised way. It uses the Chew-Bite Intelligent Algorithm (CBIA) developed by Chelotti et al. (2018) to detect and classify JMs in term of chew ('c'), bite ('b') and chew-bite ('x') classes.

This script perform the following steps:


    (1) Convert the stereo MP3 file to mono WAV.

    (2) The channel corresponding to the microphone facing inward the cow's head is used.

    (3) It detect and classify automatically the JMs over the entire WAV file.

    (4) Detected JMs not corresponding with a grazing or rumination bout are supressed.

    (5) A post-processing is applied in the classified JM-labels:

        (5.A) Modify the labels 'x' to 'cb'.

        (5.B) re-label all classified JMs produced during rumination bouts as "chew during rumination" ('r'). This correct possible missclassification of JMs during rumination.

-------------------------------------------------------------------------
Author: Luciano Martinez Rau (Mid Sweden University / sinc(<i>i</i>)-CONICET)
------------------------------------------------------------------------

Specify the correct paths

In [1]:
from pydub import AudioSegment
import pandas as pd
import bufar_algorithm
import getpass
import os
import numpy as np

password = getpass.getpass()
command = "sudo -S apt install ffmpeg"
os.system('echo %s | %s' % (password, command))

[sudo] password for luciano: 



Reading package lists...
Building dependency tree...
Reading state information...
ffmpeg is already the newest version (7:4.2.7-0ubuntu0.1).
The following packages were automatically installed and are no longer required:
  linux-headers-5.15.0-72-generic linux-hwe-5.15-headers-5.15.0-72
  linux-image-5.15.0-72-generic linux-modules-5.15.0-72-generic
  linux-modules-extra-5.15.0-72-generic
Use 'sudo apt autoremove' to remove them.
0 to upgrade, 0 to newly install, 0 to remove and 103 not to upgrade.


0

In [2]:
if 'google.colab' in str(get_ipython()):
    from google.colab import drive
    drive.mount('/drive',force_remount=True)
    # path to read the MP3 audio recordings
    folderMP3 = "/drive/My Drive/Colab Notebooks/DatabaseMichigan/data/audio/"
    # path to the MP3AudioInformation.xlsx file
    tableMP3file = '/drive/My Drive/Colab Notebooks/DatabaseMichigan/data/MP3AudioInformation.xlsx'
    # path to save the JM labels
    folderJMlabel = "/drive/My Drive/Colab Notebooks/DatabaseMichigan/data/labels_JMs/"
else:
    # path to read the MP3 audio recordings
    folderMP3 = "/home/luciano/Music/"
    # path to the MP3AudioInformation.xlsx file
    tableMP3file = 'data/MP3AudioInformation.xlsx'
    # path to save the JM labels
    folderJMlabel = "data/labels_JMs/"

Processing algorithm

In [3]:
def save_label_files(*args):
    mp3file=args[0]
    #save files     
    txt_two_marks = folderJMlabel + os.path.splitext(mp3file)[0] + "_JM.txt"
    csv_two_marks = folderJMlabel + os.path.splitext(mp3file)[0] + "_JM.csv"
    txt_one_mark = folderJMlabel + os.path.splitext(mp3file)[0] + "_JM_onemark.txt"
    csv_one_mark = folderJMlabel + os.path.splitext(mp3file)[0] + "_JM_onemark.csv"
    
    if (len(args)==2):
        filtered_JM=args[1]
        first_two_columns = filtered_JM[:, :2].astype(float)
        mean_values = np.mean(first_two_columns, axis=1)
        filtered_JM_onemark = np.column_stack((mean_values, filtered_JM[:, 2]))
        pd.DataFrame(filtered_JM).to_csv(txt_two_marks,index=None,sep='\t',header=None,float_format='%.2f')
        pd.DataFrame(filtered_JM).to_csv(csv_two_marks,index=None,header=None,float_format='%.2f')
        pd.DataFrame(filtered_JM_onemark).to_csv(txt_one_mark,index=None,sep='\t',header=None,float_format='%.2f')
        pd.DataFrame(filtered_JM_onemark).to_csv(csv_one_mark,index=None,header=None,float_format='%.2f')   
    else:
        pd.DataFrame().to_csv(txt_two_marks,index=None,sep='\t',header=None,float_format='%.2f')
        pd.DataFrame().to_csv(csv_two_marks,index=None,header=None,float_format='%.2f')
        pd.DataFrame().to_csv(txt_one_mark,index=None,sep='\t',header=None,float_format='%.2f')
        pd.DataFrame().to_csv(csv_one_mark,index=None,header=None,float_format='%.2f')   
     

In [8]:
df = pd.read_excel(tableMP3file)

# read channel
for index, row in df.iterrows():
    mp3file = df["Name file"][index]
    JMlabelfile = os.path.exists(folderJMlabel + os.path.splitext(mp3file)[0] + "_JM.txt")
    
    if not(JMlabelfile):
        
        channel = df["Channel 1 is the microphone facing…"][index]
        if (channel=="inwards") or (channel=="-") :
            channel = 0
        else:
            channel = 1
        
        # Load an MP3 file
        audio = AudioSegment.from_mp3(folderMP3+mp3file)

        #convert MP3 to mono WAV
        temp_wav = "temporal.wav"
        audio_monos = audio.split_to_mono()
        desired_channel = audio_monos[channel]
        desired_channel.export(temp_wav,format="wav")

        # compute all JMs events using the CBIA incorporated into the BUFAR algorithms
        with open(temp_wav, 'rb') as audio_file:
            _, events, _ = bufar_original.run(audio_file)

        # delete JM labels outside the grazing and rumination bouts
        activitylabelfile = os.path.splitext(mp3file)[0] + ".csv"
        activities_labels = pd.read_csv(folderMP3+activitylabelfile)
        grazing_labels = activities_labels[activities_labels["Label"] == "Grazing"]
        rumination_labels = activities_labels[activities_labels["Label"].isin(['Rumination','Rumination (lying-down)',
                                                                                'Rumination (windy)','Rumination (raining)'])]
        #for grazing
        filtered_rows = []
        for row in events:
            start, end, label = row
            if any((start >= grazing_labels['Start']) & (end <= grazing_labels['Finish'])):
                filtered_rows.append(row)
        filtered_JM_grazing = np.array(filtered_rows)
        if (filtered_JM_grazing.size != 0):
            indices_to_replace = np.where(filtered_JM_grazing[:, 2] == 'x')
            filtered_JM_grazing[indices_to_replace, 2] = 'cb'
        #for rumination
        filtered_rows = []
        for row in events:
            start, end, label = row
            if any((start >= rumination_labels['Start']) & (end <= rumination_labels['Finish'])):
                filtered_rows.append(row)
        filtered_JM_rumination = np.array(filtered_rows)
        if (filtered_JM_rumination.size != 0):
            filtered_JM_rumination[:, 2] = 'r'
        
        # merge
        if ((filtered_JM_rumination.size != 0) & (filtered_JM_grazing.size != 0)):
            filtered_JM = np.vstack((filtered_JM_grazing, filtered_JM_rumination))
            filtered_JM = filtered_JM[filtered_JM[:, 0].argsort()]
            save_label_files(mp3file,filtered_JM)
        elif (filtered_JM_rumination.size != 0 & filtered_JM_grazing.size == 0):
            save_label_files(mp3file,filtered_JM_rumination)
        elif (filtered_JM_grazing.size != 0 & filtered_JM_grazing.size == 0):
            save_label_files(mp3file,filtered_JM_grazing)
        else:
            save_label_files(mp3file)

if (os.path.exists(temp_wav)):
    os.remove(temp_wav)