# Setup

In [None]:
# run the file setup_functions.ipynb to define setting, import packages, and define functions 
%run ./setup_functions.ipynb

Todo: 
- [ ] Check if gopher frog files are grouped - do they happen a bunch of files in a row? Bunch of days in a row? 
- [ ] Clean parameter selection for use in methods 

#*# - indicates locations where you may want to edit (e.g. file paths, parameter values, etc.)

### Combine multiple ribbit score csv files 

In [448]:
# Only need to run if you need to combine ribbit scores from multiple csv files  
# Useful if you broke up a model run into section to run it faster 
# WARNING: if delete_files = True this will delete individual files after combining them
# keep next 2 lines commented out unless running this chunk to avoid deleting files unintentionally 

# folder_path = "./ribbit_scores_flshe_20221206/" #*# path to folder containing the csv files you want to combine 
# rs_flshe = combine_csvs(folder_path, new_csv_name = "ribbit_scores_combined.csv", delete_files = True)


# Import and clean data

### Define file and folder paths for data import and cleaning 

In [None]:
# file path to csv file with ribbit scores 
ribbit_scores_fp = "./ribbit_scores_flshe_20221206/ribbit_scores_combined.csv" #*#

# file path to csv file with manually verified data 
verified_data_fp = "../manually_verified_data/FLSHE_pond400.csv" #*#

# path to folder containing audio files 
audio_files_fp = '/Volumes/Expansion/Frog Call Project/Calling Data/FLSHE/' #*#
# Note: if the folders within this folder are structured differently, you may need to edit the full file paths in the 
#       data cleaning section below (inicated with #*#)



### Import and clean RIBBIT score data

In [409]:
# Import ribbit scores based on ribbit_scores_fp
rs_flshe = pd.read_csv(ribbit_scores_fp, index_col = 0)

# extract date from file path 
rs_flshe['date'] = pd.to_datetime(rs_flshe.index.str[-19:-4], format='%Y%m%d_%H%M%S', errors='coerce') 


### Import and clean manually verified data 

In [443]:
# import manually verified data 
verified_flshe = pd.read_csv(verified_data_fp)[["File name", "Pond #", "L. capito", "gopher call time", "Date"]] # keeps only listed columns 

# rename columns for convenience
verified_flshe = verified_flshe.rename(columns = {"File name":"file_name", "Pond #":"logger", "L. capito":"Lcapito", "gopher call time":"call_time", "Date":"date"})

# make Lcapito categorical
verified_flshe.Lcapito = verified_flshe.Lcapito.astype("category")

# create year column based on date string
verified_flshe['year'] = verified_flshe.date.str[0:4]
verified_flshe.astype({"year":"int"})

# add .wav to file name if it is not included with the file name 
for i in verified_flshe.index:
    if verified_flshe["file_name"][i][-4:] != ".wav": 
        verified_flshe["file_name"][i] = verified_flshe["file_name"][i] + ".wav"
    
#*# create full file path from file names, year, and logger numbers #*# 
verified_flshe['file_path'] = audio_files_fp + 'FLSHE_' + \
    verified_flshe['year'].astype('string') + \
    '/FLSHE_' + verified_flshe['year'].astype('string') + '_' + verified_flshe['logger'].astype('string') + '/' + \
    verified_flshe['file_name'] #*#

# set file path as index 
verified_flshe = verified_flshe.set_index('file_path')


### Merge ribbit scores to manually verified data 

In [None]:
# merge with ribbit scores data file 
verified_flshe = verified_flshe.drop(columns = ["year", "date", "logger"]).merge(rs_flshe, left_index = True, right_index = True)
verified_flshe = verified_flshe.dropna(subset=['Lcapito']) # drop any rows with "NaN" for Lcapito - if left empty, etc. 
