# **ABGQI Prediction**
This notebook takes in the melspecs and generates a CSV file containing ABGQI predictions for each melspec. The predictions are generated from a model which is located in the ABGQI_CNN folder.

In [None]:
# Importing Drive for Google Colab
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# For getting the directories
melspec_dirs = [os.path.normpath(i) for i in glob.glob('/content/drive/Shareddrives/cs479 ABGQI/CQuinn8-ABGQI-CNN-93420d1/5_ABGQI-CNN_deployment/data/1min_melspecs/*')]
results_dir = '/content/drive/Shareddrives/cs479 ABGQI/CQuinn8-ABGQI-CNN-93420d1/5_ABGQI-CNN_deployment/results/1min_predictions/'
model_path = '/content/drive/Shareddrives/cs479 ABGQI/CQuinn8-ABGQI-CNN-93420d1/ABGQI-CNN'

In [None]:
# FUNCTIONS
# function that interprets image after being provided a path in process_path
def decode_img(img, IMG_HEIGHT, IMG_WIDTH):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_jpeg(img, channels=3)
  # Use `convert_image_dtype` to convert to floats in the [0,1] range.
  img = tf.image.convert_image_dtype(img, tf.float32)
  # resize the image to the desired size.
  return tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])

# function to read in a file path
def process_path(file_path, IMG_HEIGHT, IMG_WIDTH):
    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img, IMG_HEIGHT, IMG_WIDTH)
    return img #, label

In [None]:
# Load in model (not checkpoint) from ABQQI_CNN folder
model = tf.keras.models.load_model(model_path)
IMG_HEIGHT = 224
IMG_WIDTH = 224

model.summary()
print('Number of folders in the parent Mel-Spec directory:',len(melspec_dirs))

In [None]:
# iterate through each folder/wav file which contains mfccs
for i in range(len(melspec_dirs)):

    temp_wav = melspec_dirs[i].split('/')[-1] # get wav name
    out_path_temp = os.path.join(results_dir, temp_wav + '.csv')
    print("Wav name:",temp_wav)

    # does the prediction exist already?
    if os.path.isfile(out_path_temp):
        print("WAV ALREADY PREDICTED")
        continue
    else:
        # if not, what is the mel spec directory
        mel_store = melspec_dirs[i]
        print("MFCC dir:", mel_store)

        mel_store_lst = os.listdir(mel_store) # temp dir with the PNGs
        loop_run = len(mel_store_lst)
        print("Number of mfccs in current dir =", loop_run)

        sigmoid_pred_lst = []
        mel_names = []
        # iterate through each melspec in current file directory
        for j in range(loop_run):
            fname = str(j*2)+'.png'
            mel_names.append(fname)

            img_ = process_path(os.path.join(mel_store, fname), IMG_HEIGHT = 224, IMG_WIDTH = 224)
            img_ = tf.reshape(img_, shape= (1, IMG_HEIGHT, IMG_WIDTH, 3))

            # get predictions
            pred = model.predict(img_, verbose=0, steps=1, callbacks=None, max_queue_size=10,
                                 workers=1, use_multiprocessing=False)

            sigmoid_pred = tf.math.sigmoid(pred).numpy() # sigmoid here
            sigmoid_pred_lst.append(sigmoid_pred)

        # format predictions
        flat_sigmoid = [item for sublist in sigmoid_pred_lst for item in sublist]
        df_sigmoid = pd.DataFrame(flat_sigmoid)
        df_sigmoid = pd.concat([pd.Series(mel_names), df_sigmoid], axis=1)
        df_sigmoid.columns = ["melspec", "Anthropophony", "Biophony", "Geophony", "Interference", "Quiet"]

        # save predictions
        df_sigmoid.to_csv(out_path_temp, index = False)
        print('Saved this file at:', out_path_temp, "Length was:",len(sigmoid_pred_lst))