In [4]:
from pathlib import Path
import pandas as pd
import logging

def rttm_to_dataframe(rttm_file: Path) -> pd.DataFrame:
    """
    This function reads the voice_type_classifier
    output rttm file and returns its content as a pandas DataFrame.

    Parameters
    ----------
    rttm_file : path
        the path to the RTTM file

    Returns
    -------
    pd.DataFrame
        the content of the RTTM file as a pandas DataFrame
    """
    logging.info(f"Reading RTTM file from: {rttm_file}")
    
    try:
        df = pd.read_csv(
            rttm_file,
            sep=" ",
            names=[
                "Speaker",
                "audio_file_name",
                "audio_file_id",
                "Utterance_Start",
                "Utterance_Duration",
                "NA_1",
                "NA_2",
                "Voice_type",
                "NA_3",
                "NA_4",
            ],
        )
    except Exception as e:
        logging.error(f"Failed to read RTTM file: {e}")
        raise
    
    logging.info("Successfully read RTTM file. Processing data...")

    # Drop unnecessary columns
    df = df.drop(columns=["Speaker", "audio_file_id", "NA_1", "NA_2", "NA_3", "NA_4"])  # noqa: E501
    df["Utterance_End"] = df["Utterance_Start"] + df["Utterance_Duration"]
    
    logging.info("Data processing complete. Returning DataFrame.")

    try:
        df.to_pickle('/home/nele_pauline_suffo/outputs/vtc/df_output.pkl')
        logging.info(f"DataFrame successfully saved to: /home/nele_pauline_suffo/outputs/vtc/df_output.pkl")
    except Exception as e:
        logging.error(f"Failed to save DataFrame to file: {e}")
        raise
    
    return df

In [5]:
df = rttm_to_dataframe('/home/nele_pauline_suffo/projects/voice-type-classifier/output_voice_type_classifier/vtc/all.rttm')

In [6]:
df.head()

Unnamed: 0,audio_file_name,Utterance_Start,Utterance_Duration,Voice_type,Utterance_End
0,100898_16kHz,0.231,4.411,SPEECH,4.642
1,100898_16kHz,0.251,1.453,KCHI,1.704
2,100898_16kHz,1.011,0.255,CHI,1.266
3,100898_16kHz,1.851,1.774,FEM,3.625
4,100898_16kHz,3.731,0.78,KCHI,4.511
