Initial tests

In [1]:
import pandas as pd
import transformers
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_subset = pd.read_csv(
    "/Users/rishikarandev/deep learning/ECE685D_FinalProject/CheXpert-v1.0-small/train.csv"
).head()

In [3]:
train_subset

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,CheXpert-v1.0-small/train/patient00001/study1/...,Female,68,Frontal,AP,1.0,,,,,,,,,0.0,,,,1.0
1,CheXpert-v1.0-small/train/patient00002/study2/...,Female,87,Frontal,AP,,,-1.0,1.0,,-1.0,-1.0,,-1.0,,-1.0,,1.0,
2,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Frontal,AP,,,,1.0,,,-1.0,,,,,,1.0,
3,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Lateral,,,,,1.0,,,-1.0,,,,,,1.0,
4,CheXpert-v1.0-small/train/patient00003/study1/...,Male,41,Frontal,AP,,,,,,1.0,,,,0.0,,,,


In [4]:
train_subset.columns

Index(['Path', 'Sex', 'Age', 'Frontal/Lateral', 'AP/PA', 'No Finding',
       'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity',
       'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis',
       'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture',
       'Support Devices'],
      dtype='object')

In [5]:
train_subset = train_subset.drop("AP/PA", axis=1)

In [6]:
train_subset = train_subset.fillna(0)

In [7]:
def generate_report(row):
    labels = row.iloc[4:]
    findings_list = list(labels[labels == 1].index)
    findings = ", ".join(findings_list)
    return f"X-Ray report findings: {findings}"

In [8]:
test = train_subset.apply(generate_report, axis=1)

In [9]:
from transformers import BertTokenizer, BertModel

In [10]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [16]:
sample_sentence = test[0]

token_ids = []
attention_masks = []

# Encode each review
for report in test:
    batch_encoder = tokenizer.encode_plus(
        report,
        max_length=512,
        padding="max_length",
        truncation=True,
        return_tensors="pt",
    )

    token_ids.append(batch_encoder["input_ids"])
    attention_masks.append(batch_encoder["attention_mask"])

# Convert token IDs and attention mask lists to PyTorch tensors
token_ids = torch.cat(token_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)

In [12]:
# Load pre-trained model (weights)
model = BertModel.from_pretrained(
    "bert-base-uncased",
    output_hidden_states=True,  # Whether the model returns all hidden-states.
)

# Put the model in "evaluation" mode, meaning feed-forward operation.
model.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [17]:
with torch.no_grad():
    outputs = model(token_ids, attention_mask=attention_masks)
    word_embeddings = outputs.last_hidden_state  # This contains the embeddings

In [26]:
# Get only embedding for CLS token
CLS = word_embeddings[:, 0, :]

In [22]:
import torch.nn as nn

final = nn.Linear(768, 512)

In [28]:
final(CLS).shape

torch.Size([5, 512])

Class

In [29]:
class TextEncoder(nn.Module):
    def __init__(self):
        super(TextEncoder, self).__init__()
        self.bert = BertModel.from_pretrained(
            "bert-base-uncased",
            output_hidden_states=True,
        )
        self.linear = nn.Linear(768, 512)

    def forward(self, token_ids, attention_masks):
        outputs = self.bert(token_ids, attention_mask=attention_masks)
        cls_embeddings = outputs.last_hidden_state[:, 0, :]
        embeddings = self.linear(cls_embeddings)
        return embeddings

Dataset

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        # Text stuff
        self.text_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
        self.reports = df.apply(generate_report, axis=1) # Maybe move generate report to inside the dataset idk
        
        
        # Vision stuff?

    def __len__(self):
        return len(self.reports) # This could work or we could do another way

    def __getitem__(self, idx):
        report = self.reports[idx]
        encoder = tokenizer.encode_plus(
        report,
        max_length=512,
        padding="max_length",
        truncation=True,
        return_tensors="pt",
    )
        return {"token_ids": encoder['input_ids'], "attention_masks": encoder['attention_mask']} # Add vision stuff that you pass to your encoder??

Samples come in as dataframe and we put in dataset. For each sample, we can then return inputs for text encoder (token ids & attention masks) & for vision encoder (tensor of pixels). We pass relevant inputs to text encoder and vision encoder, get embeddings for that sample. Then blah blah