# Finetuning PCLR model trained without Apollo

# Load PCLR Model

In [3]:
'''
Date: Jan 3rd 2023
Author: Hyewon Jeong
'''

from typing import List, Dict

import numpy as np
import os
import pickle
import pandas as pd

import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.utils import Sequence

In [13]:
'''
Needs to be debugged sometime:
Maybe need to specify getx and gety function based on the datagen class below?
https://medium.com/analytics-vidhya/write-your-own-custom-data-generator-for-tensorflow-keras-1252b64e41c3
'''
class DataGenerator(Sequence):
    def __init__(self, df_tab, uids, to_fit=True, batch_size=64, ecg_len = 2500,
                 n_channels=12, n_classes=2, shuffle=True):
        self.dir_csv = '/storage/shared/apollo/same-day/'
        self.pcwp_th = 18.0
        self.df = df_tab
        self.pcwp_train = np.load("/storage/hyewonjeong/metricssl_02/stores/train_info.npy")
        self.pcwp_mean = self.pcwp_train[0]
        self.pcwp_std = self.pcwp_train[1]
        self.label = 'pcwp'
        self.train_mode = 'classification' # 'regression', otherwise
        
        self.list_IDs = uids
        self.to_fit = to_fit
        self.batch_size = batch_size
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        """Denotes the number of batches per epoch
        :return: number of batches per epoch
        """
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # load ECG
        qid = row['QuantaID']
        doc = row['Date_of_Cath']
        fname = os.path.join(self.dir_csv, f'{qid}_{doc}.csv')

        x = pd.read_csv(fname).values[::2,1:].astype(np.float32)
        x = x / 1000
        
        if not self.to_fit:
            return X
        
        if self.label == 'pcwp':
            if self.train_mode == 'regression':
                y = row['PCWP_mean']
                y = (y-self.pcwp_mean)/(self.pcwp_std) # normalize labels
            else:
                y = row['PCWP_mean'] > self.pcwp_th
        elif self.label == 'age':
            if self.train_mode == 'regression':
                y = row['Age_at_Cath'] #regression
            else:
                y = row['PCWP_mean'] > self.args.pcwp_th

        elif self.label == 'gender':
            y = row['Sex']

        return x[:2496,:].T, y

    def on_epoch_end(self):
        """Updates indexes after each epoch
        """
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

In [14]:
# Set Seeds and Devices (GPU)
seed = 0
gpu = 0

tf.random.set_seed(seed)
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu)
gpus = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(gpus[0], 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

# Load Dataset
dir_data = '/storage/shared/apollo/same-day/'
tab = '/storage/shared/apollo/same-day/tabular_data.csv'
df_tab = pd.read_csv(tab)
df_tab = df_tab.dropna(subset=['CO'])
frac_train = 0.8
frac_val = 0.2
uids = df_tab['QuantaID'].unique()

train_ids = np.load("/storage/hyewonjeong/metricssl_02/stores/train_ids.npy")
val_ids = np.load("/storage/hyewonjeong/metricssl_02/stores/val_ids.npy")
test_ids = np.load("/storage/hyewonjeong/metricssl_02/stores/test_ids.npy")

training_generator = DataGenerator(df_tab, train_ids)
validation_generator = DataGenerator(df_tab, val_ids)
test_generator = DataGenerator(df_tab, val_ids)



AttributeError: 'tuple' object has no attribute 'rank'

In [15]:
training_generator

<__main__.DataGenerator at 0x7fdf1d9e9890>

In [None]:
# Design model
model = load_model("./PCLR_wo_apollo.h5")
model.compile()

# Train model on dataset
model.fit(training_generator, validation_data=validation_generator)

In [None]:
image_path = 'path to images'

pred_labels = [...] # list of image names

pred_generator = DataGenerator(pred_idx, pred_labels, image_path, to_fit=False)

pred = model.predict_generator(pred_generator)

# Codes to 참고

In [None]:
# Configure the model for training
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])
 
# Train the model
history = model.fit(
      train_generator,
      steps_per_epoch=
         train_generator.samples/train_generator.batch_size,
      epochs=20,
      validation_data=validation_generator, 
      validation_steps=
         validation_generator.samples/validation_generator.batch_size,
      verbose=1)

In [None]:
# Utility function for plotting of the model results
def visualize_results(history):
    # Plot the accuracy and loss curves
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
 
    epochs = range(len(acc))
 
    plt.plot(epochs, acc, 'b', label='Training acc')
    plt.plot(epochs, val_acc, 'r', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()
 
    plt.figure()
 
    plt.plot(epochs, loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()
 
    plt.show()
 
 
# Run the function to illustrate accuracy and loss
visualize_results(history)

In [None]:
# Utility function for obtaining of the errors 
def obtain_errors(val_generator, predictions):
    # Get the filenames from the generator
    fnames = validation_generator.filenames
 
    # Get the ground truth from generator
    ground_truth = validation_generator.classes
 
    # Get the dictionary of classes
    label2index = validation_generator.class_indices
 
    # Obtain the list of the classes
    idx2label = list(label2index.keys())
    print("The list of classes: ", idx2label)
 
    # Get the class index
    predicted_classes = np.argmax(predictions, axis=1)
 
    errors = np.where(predicted_classes != ground_truth)[0]
    print("Number of errors = {}/{}".format(len(errors),validation_generator.samples))
     
    return idx2label, errors, fnames
 
 
# Utility function for visualization of the errors
def show_errors(idx2label, errors, predictions, fnames):
    # Show the errors
    for i in range(len(errors)):
        pred_class = np.argmax(predictions[errors[i]])
        pred_label = idx2label[pred_class]
 
        title = 'Original label:{}, Prediction :{}, confidence : {:.3f}'.format(
            fnames[errors[i]].split('/')[0],
            pred_label,
            predictions[errors[i]][pred_class])
 
        original = load_img('{}/{}'.format(validation_dir,fnames[errors[i]]))
        plt.figure(figsize=[7,7])
        plt.axis('off')
        plt.title(title)
        plt.imshow(original)
        plt.show()

In [None]:
# Get the predictions from the model using the generator
predictions = model.predict(validation_generator, steps=validation_generator.samples/validation_generator.batch_size,verbose=1)
 
# Run the function to get the list of classes and errors
idx2label, errors, fnames = obtain_errors(validation_generator, predictions)
 
# Run the function to illustrate the error cases
show_errors(idx2label, errors, predictions, fnames)