# Functions to run RIBBIT 

## !!! Warning: You should NOT have to edit these functions to run the model on new data !!!

Note: this file must be run after base parameters have been defined

In [None]:
# run RIBBIT for every index in a df where the index is a file path; save the score in that df 

# Inputs: 
# df - dataframe with filepaths to audio files as the indices of the dataframe
#      It is recommended that the dataframe includes columns with: 
#          - the year the file was recorded
#          - the date and time the file was recorded 
#          - any interesting groupings for the files (e.g. logger it was recorded with, pond/wetland where it was recorded, etc.)
# Parameters for the RIBBIT model: pulse_rate_range, singal_band, noise_bands, clip_duration, clip_overlap, window_samples 
#           Defaults are the base parameters defined above 
# param_set_lab - a label used when running multiple parameter sets (e.g. during sensitivity analysis (SA))
#       to indicate which set of parameters the "score" and "time_stamp" columns correspond to
#       blank if only running one set of parameters
# folder_lab - label for what the folder name should be where ribbit scores are saved. 
#       Folder will be labeled "ribbit_scores_folder_lab"

# Outputs: 
# ribbit_scores.csv - datasheet with Ribbit scores for each file 
# param_values.txt - text file with parameter values used for the model. 
# (if running multiple parameter sets, the csv and txt files will be labeled with param_set_lab)

def run_ribbit_for_indices(df, pulse_rate_range=base_pulse_rate_range,
                                signal_band=base_signal_band,
                                noise_bands=base_noise_bands, 
                                clip_duration=base_clip_duration,
                                clip_overlap=base_clip_overlap,
                                window_samples = base_window_samples, 
                                param_set_lab="", 
                                folder_lab = ''):
    
    folder_path = './ribbit_scores' + folder_lab
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        
    csv_file_name = folder_path + '/ribbit_scores' + param_set_lab + '.csv'
    
    # Save a text file with the parameter values for future reference 
    param_file = open(folder_path + "/param_values" + param_set_lab + ".txt", "w") 
    str1 = repr(pulse_rate_range) 
    param_file.write("pulse_rate_range = " + str1 + "\n")
    str1 = repr(signal_band) 
    param_file.write("signal_band = " + str1 + "\n")
    str1 = repr(noise_bands) 
    param_file.write("noise_bands = " + str1 + "\n")
    str1 = repr(clip_duration) 
    param_file.write("clip_duration = " + str1 + "\n")
    str1 = repr(clip_overlap) 
    param_file.write("clip_overlap = " + str1 + "\n")
    str1 = repr(window_samples) 
    param_file.write("window_samples = " + str1 + "\n")
    param_file.close()
    
    start_time = datetime.now() # track how long the model takes
    checkpoint = 0; # create checkpoint for saving RIBBIT scores periodically 

    # for every file in the dataframe, calculate the RIBBIT score 
    for path in df.index:

        try: # make sure RIBBIT is working for this audio file, if it doesn't work for some reason, skip this so the model will continue running 
            #make the spectrogram
            spec = Spectrogram.from_audio(Audio.from_file(path), window_samples = window_samples)

            #run RIBBIT on the spectrogram with desired parameters 
            score_df =  ribbit(
                                spec,
                                pulse_rate_range=pulse_rate_range,
                                signal_band=signal_band,
                                clip_duration=clip_duration,
                                clip_overlap=clip_overlap,
                                noise_bands=noise_bands,
                                plot=False)


            maxIndex= score_df.score.idxmax() # find the index for where the max RIBBIT score occurs for the audio file 

            # record the max RIBBIT score for the auio file (multiplied by 10,000 to make it easier to read)
            df.at[path,'score'+param_set_lab] = score_df.at[maxIndex, 'score'] * 10000
            # record the time in the audio file that the max RIBBIT score occurs (so can be checked manually)
            df.at[path,'time_stamp'+param_set_lab] = score_df.at[maxIndex, 'start_time']

            checkpoint = checkpoint + 1 

        except (RuntimeError, EOFError) as runtime_error: # if the file does not work with RIBBIT, skip it and output a failure message; use -1 for RIBBIT score and time stamp
            print("File failed: ", path)
            print(runtime_error)


            df.at[path,'score'+param_set_lab] = np.nan 
            df.at[path,'time_stamp'+param_set_lab] = np.nan
            checkpoint = checkpoint + 1
            pass 

        # every 100 files, print checkpoint message and save RIBBIT scores to csv file to back them up 
        if checkpoint % 100 == 0:
            print("Finished file number ", checkpoint, "/nFile name: ", path)
            # print(df.sort_values(by='score'+param_set_lab,ascending=False).head(1))
            df.to_csv(csv_file_name) 

    df['date'] = pd.to_datetime(df.index.str[-19:-4], format='%Y%m%d_%H%M%S', errors='coerce') 
    df.sort_values(by='score'+param_set_lab,ascending=False).to_csv(csv_file_name) # sort by highest RIBBIT scores and save to csv
    print('Time elapsed ',datetime.now() - start_time) # print time taken to run model 
    
# ------------------------------------------------------------------------------------- #

# function to run OFAT sensitivity analysis (run multiple parameter sets changing only one parameter)

# Inputs: 
# df - dataframe with filepaths to audio files as the indices of the dataframe
# test_param - the name of the parameter that is varied (e.g. "pulse_rate_range")
# param_vals - the values you want to test for test_param
#       Note: all other values will default to the base parameters

# Outputs: 
# folder labeled "ribbit_scores_test_param" containing a csv file and txt file for each parameter set: 
#      ribbit_scores_####.csv - datasheet with Ribbit scores for each file
#      param_values_####.txt - text file with parameter values used for the model

def run_ribbit_SA(df, test_param, param_vals):
    if test_param == "pulse_rate_range":
        for p in range(0,len(param_vals)):
            run_ribbit_for_indices(df, pulse_rate_range = param_vals[p], 
                                   param_set_lab = str(p), folder_lab = "_"+test_param)
    elif test_param == "signal_band":
        for p in range(0,len(param_vals)):
            run_ribbit_for_indices(df, signal_band = param_vals[p], 
                                   param_set_lab = str(p), folder_lab = "_"+test_param)
    elif test_param == "noise_bands":
        for p in range(0,len(param_vals)):
            run_ribbit_for_indices(df, noise_bands = param_vals[p], 
                                   param_set_lab = str(p), folder_lab = "_"+test_param)
    elif test_param == "clip_duration":
        for p in range(0,len(param_vals)):
            run_ribbit_for_indices(df, clip_duration = param_vals[p], 
                                   param_set_lab = str(p), folder_lab = "_"+test_param)
    elif test_param == "clip_overlap":
        for p in range(0,len(param_vals)):
            run_ribbit_for_indices(df, clip_overlap = param_vals[p], 
                                   param_set_lab = str(p), folder_lab = "_"+test_param)
    elif test_param == "window_samples":
        for p in range(0,len(param_vals)):
            run_ribbit_for_indices(df, window_samples = param_vals[p], 
                                   param_set_lab = str(p), folder_lab = "_"+test_param)

    else: 
        print("Warning: test parameter not recognized")
        
        