In [1]:
import pandas as pd
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from configs import configs
from dataset import ChestXRayCaptionDataset
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from utils import train_transform, evaluate_transform
from model import Chexnet
from pytorch_tokenizer import create_tokenizer
from pytorch_test import evaluation_matrix
from pytorch_chexpert import chexpert
from sklearn.metrics import recall_score, precision_score, f1_score

2091lines [00:00, 190625.32lines/s]
2091lines [00:00, 190592.18lines/s]


caller: c:\Users\darkenstardragon\Documents\Work\chest-xray-report-gen\text_generation\pytorch_chexpert.py
Creating Chexpert reward module...
Using 1 GPUs!


In [2]:
tokenizer = create_tokenizer()
encoder = Chexnet.finetuned()
pca_mean = None
pca_components = None
train_loader = DataLoader(
    ChestXRayCaptionDataset('train', transform=train_transform),
    batch_size=8,
    shuffle=False,
    num_workers=0,
    pin_memory=True,
)

val_loader = DataLoader(
    ChestXRayCaptionDataset('val', transform=evaluate_transform),
    batch_size=8,
    shuffle=False,
    num_workers=0,
    pin_memory=True,
)

test_loader = DataLoader(
    ChestXRayCaptionDataset('test', transform=evaluate_transform),
    batch_size=8,
    shuffle=False,
    num_workers=0,
    pin_memory=True,
)

2091lines [00:00, 174731.33lines/s]


loading weights from weights/pretrained_encoder/pretrained_enc_2022-02-26_16-38-13.950274.pth.tar


In [3]:
def generate_image_embeddings(encoder, data_loader):
    encoder.eval()
    image_embeddings = []
    probs = []
    captions = []

    with torch.no_grad():
        for i, (img, caption, _) in enumerate(tqdm(data_loader)):
            img = img.cuda()
            encoded_img, prob = encoder(img)
            image_embeddings.append(encoded_img.cpu())
            probs.append(prob.cpu())
            captions.append(caption.cpu())

    image_embeddings = torch.cat(image_embeddings).reshape(-1, 1024*8*8).numpy()
    probs = torch.cat(probs).numpy()
    captions = torch.cat(captions).numpy()
    return image_embeddings, probs, captions

def load_pca():
    global pca_mean, pca_components
    pca_mean = np.load('weights/pca_mean.npy')
    pca_components = np.load('weights/pca_components.npy')

def pca_transform(embeddings):
    if type(pca_mean) == type(None) or type(pca_components) == type(None):
        load_pca()
    return np.dot(embeddings - pca_mean, pca_components.T)

In [4]:
train_image_embeddings, train_probs, train_captions = generate_image_embeddings(encoder, train_loader)


100%|██████████| 1283/1283 [01:47<00:00, 11.98it/s]


In [5]:
# pca training
# pca = PCA(n_components=1024)
# pca.fit(train_image_embeddings)
# np.save('weights/pca_mean_14.npy', pca.mean_)
# np.save('weights/pca_component_14.npy', pca.components_)

PCA(n_components=1024)

In [9]:
val_image_embeddings, val_probs, val_captions = generate_image_embeddings(encoder, val_loader)
test_image_embeddings, test_probs, test_captions = generate_image_embeddings(encoder, test_loader)

100%|██████████| 171/171 [00:12<00:00, 13.50it/s]
100%|██████████| 257/257 [00:24<00:00, 10.52it/s]


In [10]:
train_image_embeddings = pca_transform(train_image_embeddings)
val_image_embeddings = pca_transform(val_image_embeddings)
test_image_embeddings = pca_transform(test_image_embeddings)

In [11]:
# Fit pca transformed into knn
indices = [*range(train_image_embeddings.shape[0])]
one_nn = KNeighborsClassifier(n_neighbors=1)
one_nn.fit(train_image_embeddings, indices)

KNeighborsClassifier(n_neighbors=1)

In [12]:
def predict(embeddings, decode=False):
    captions = []
    dists, indices = one_nn.kneighbors(embeddings)
    for i in indices:
        captions.append(train_captions[i])
    captions = np.array(captions).reshape(embeddings.shape[0], -1)
    if decode:
        captions = tokenizer.decode(captions)
    return captions

def evaluate(true_captions, pred_captions):
    true_df = []
    pred_df = []

    true_loader = DataLoader(
        true_captions, 
        batch_size=18,
        num_workers=0,
        pin_memory=True
    )
    pred_loader = DataLoader(
        pred_captions, 
        batch_size=18,
        num_workers=0,
        pin_memory=True
    )

    for t in tqdm(true_loader):
        labels = chexpert(t, tokenizer)
        true_df.append(labels)

    for p in tqdm(pred_loader):
        labels = chexpert(p, tokenizer)
        pred_df.append(labels)
    
    true_df = pd.concat(true_df).reset_index(drop=True)
    pred_df = pd.concat(pred_df).reset_index(drop=True)
    return evaluation_matrix(true_df, pred_df)

In [13]:
predicted_reports = predict(val_image_embeddings)
val_eval_matrix = evaluate(val_captions, predicted_reports)

100%|██████████| 76/76 [00:17<00:00,  4.40it/s]
100%|██████████| 76/76 [00:15<00:00,  4.88it/s]


In [14]:
val_eval_matrix.round(4)

Metrics,Recall,Precision,F1
Enlarged Cardiomediastinum,0.7561,0.5354,0.6269
Cardiomegaly,0.6877,0.4688,0.5575
Lung Opacity,0.8177,0.5774,0.6769
Lung Lesion,0.3125,0.2817,0.2963
Edema,0.6385,0.4289,0.5131
Consolidation,0.6316,0.4352,0.5153
Pneumonia,0.4286,0.2903,0.3462
Atelectasis,0.6971,0.4515,0.5481
Pneumothorax,0.0364,0.087,0.0513
Pleural Effusion,0.6516,0.4274,0.5162


In [15]:
predicted_reports = predict(test_image_embeddings)
test_eval_matrix = evaluate(test_captions, predicted_reports)

100%|██████████| 114/114 [00:25<00:00,  4.40it/s]
100%|██████████| 114/114 [00:23<00:00,  4.87it/s]


In [16]:
test_eval_matrix.round(4)

Metrics,Recall,Precision,F1
Enlarged Cardiomediastinum,0.7327,0.5988,0.659
Cardiomegaly,0.6592,0.4993,0.5682
Lung Opacity,0.7802,0.6039,0.6808
Lung Lesion,0.2558,0.2273,0.2407
Edema,0.5986,0.4657,0.5239
Consolidation,0.6028,0.4477,0.5138
Pneumonia,0.4384,0.3147,0.3664
Atelectasis,0.6523,0.4639,0.5422
Pneumothorax,0.1304,0.15,0.1395
Pleural Effusion,0.6099,0.4333,0.5067
