# Combine ribbit scores with verified data

This notebook combines the csv file with ribbit scores with csv files with manually verified data to allow the user to assess the validity of the RIBBIT model. This example is given with the ribbit scores created from all of the data in Dec 2022. 

## Setup

In [11]:
# run the file setup_functions.ipynb to define setting, import packages, and define functions 
%run ../ribbit_functions/setup_functions.ipynb

# Import and clean data

### Define file and folder paths for data import and cleaning 

In [12]:
# file path to csv file with ribbit scores 
ribbit_scores_fp = "./results_Dec2022/ribbit_scores_combined.csv" #*# change this to the file path for your ribbit scores

# file path to csv file with manually verified data 
verified_data_fp = "./verified_flshe/verified_flshe.csv" #*# chenge this to the file path for your manually verified data

# path to folder that contained the audio files WHEN THE MODEL WAS RUN. If you don't remember, check the ribbit score csv file. The first column has the file paths
# Basically the prefix to the audio files - this is used to access the indices of the csv file containing the ribbit scores. 
audio_files_fp = '/Volumes/Expansion/Frog Call Project/Calling Data/FLSHE/' #*# change this to the file path for where the audio data was WHEN THE MODEL WAS RUN
# Note: if the folders within this folder are structured differently, you may need to edit the full file paths in the 
#       data cleaning section below (inicated with #*#)



### Import and clean RIBBIT score data

In [14]:
# Import ribbit scores based on ribbit_scores_fp
rs_flshe = pd.read_csv(ribbit_scores_fp, index_col = 0)

rs_flshe['date']=pd.to_datetime(rs_flshe['date']) # convert column to date-time format


### Import and clean manually verified data 

In [15]:
# import manually verified data 
verified_flshe = pd.read_csv(verified_data_fp)[["File name", "Pond #", "L. capito", "gopher call time", "Date"]] # keeps only listed columns 

# rename columns for convenience
verified_flshe = verified_flshe.rename(columns = {"File name":"file_name", "Pond #":"logger", "L. capito":"Lcapito", "gopher call time":"call_time", "Date":"date"})

# make Lcapito categorical
verified_flshe.Lcapito = verified_flshe.Lcapito.astype("category")

# create year column based on date string
verified_flshe['year'] = verified_flshe.date.str[0:4]
verified_flshe.astype({"year":"int"})

# add .wav to file name if it is not included with the file name 
for i in verified_flshe.index:
    if verified_flshe["file_name"][i][-4:] != ".wav": 
        verified_flshe["file_name"][i] = verified_flshe["file_name"][i] + ".wav"
    
#*# create full file path from file names, year, and logger numbers #*# 
verified_flshe['file_path'] = audio_files_fp + 'FLSHE_' + \
    verified_flshe['year'].astype('string') + \
    '/FLSHE_' + verified_flshe['year'].astype('string') + '_' + verified_flshe['logger'].astype('string') + '/' + \
    verified_flshe['file_name'] #*#

# set file path as index 
verified_flshe = verified_flshe.set_index('file_path')


### Merge ribbit scores to manually verified data 

In [18]:
# merge with ribbit scores data file 
verified_flshe = verified_flshe.drop(columns = ["year", "date", "logger"]).merge(rs_flshe, left_index = True, right_index = True)
verified_flshe = verified_flshe.dropna(subset=['Lcapito']) # drop any rows with "NaN" for Lcapito - if left empty, etc. 


## Export to csv file

In [20]:
verified_flshe.to_csv("ribbit_scores_with_verified_data.csv")