In [None]:
import sys
sys.path.append("../")

import h5py
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from train_args import train_model
import resnet

In [None]:
from constants import (
    DATA_INPUT_DIR,
    N_LEADS,
)

In [None]:
# Instantiate the model using the config.json information.
config = '../model/config.json'
with open(config, 'r') as f:
    config_dict = json.load(f)
model = resnet.ResNet1d(
    input_dim=(N_LEADS, config_dict['seq_length']),
    blocks_dim=list(zip(config_dict['net_filter_size'], config_dict['net_seq_lengh'])),
    n_classes=1,
    kernel_size=config_dict['kernel_size'],
    dropout_rate=config_dict['dropout_rate']
)


In [None]:
# Retrieve the state dict, which has all the coefficients
state_dict = (torch.load('../model/model.pth',
              weights_only=False,
              map_location=torch.device('cpu')))

In [None]:
# Load the state dict and set the model to eval mode.
model.load_state_dict(state_dict['model'])
model.eval()

In [None]:
# Read in exam metadata and limit to file 16.
df = pd.read_csv(f'../{DATA_INPUT_DIR}/exams.csv')
df = df[df['trace_file'] == 'exams_part16.hdf5']
df.shape

In [None]:
# Read in raw ECG data for file 16.
filename = "../data/exams_part16.hdf5"
with h5py.File(filename, "r") as f:
    print("Keys in the HDF5 file:", list(f.keys()))
    dataset = f['tracings']
    print("Dataset shape:", dataset.shape)
    print("Dataset dtype:", dataset.dtype)
    data_array = f['tracings'][()]
    exam_ids = f['exam_id'][()]

In [None]:
# Limit the number of observations we use and transpose the second and third dimesions.
# This transposition can probably be done better.
n = 1000
data_array_T = np.zeros((n, data_array.shape[2], data_array.shape[1]))
for i in range(n):
    data_array_T[i, :, :] = data_array[i, :, :].T

In [None]:
data_array_T.shape

In [None]:
# Get the predictions
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.to(device)
data_array_T = torch.Tensor(data_array_T)
model.zero_grad()
y_pred = model(data_array_T)

In [None]:
# Merge predictions back onto the metadata frame
preds = pd.DataFrame({'exam_id': exam_ids[:n],
                      'torch_pred': y_pred.detach().numpy().squeeze()})
compare = df.merge(preds, on='exam_id', how='inner')

In [None]:
# Plot the new predictions against the metadata predictions
plt.scatter(compare['nn_predicted_age'], compare['torch_pred'])
plt.xlabel('NN Predicted Age')
plt.ylabel('Torch Predicted Age')
plt.show()

In [None]:
# Histogram of errors
plt.hist(compare['nn_predicted_age'] - compare['torch_pred'])
plt.show()