In [1]:
#!pip install num2words

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, AdamW
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np

from tabulate import tabulate
from tqdm import trange
import random

from num2words import num2words

In [2]:
file_path = './metadata_info/MR_data.csv'
df = pd.read_csv(file_path)
df = df.fillna(0)

training_df = df[df['Label'] != 'Unknown'][['Label', 'EchoTime', 'RepetitionTime', 'InversionTime', 'Rows', 'Columns', 'Image Plane',
                                            'Manufacturer', 'PixelSpacing', 'FlipAngle', 'SliceThickness']]

training_df = training_df[df['Label'] != 'Localizer']



  df = pd.read_csv(file_path)
  training_df = training_df[df['Label'] != 'Localizer']


In [3]:
training_df['Text'] = training_df.apply(lambda row: f'The Echo Time is {num2words(round(row["EchoTime"], 2))} ms. The Repetition Time is {num2words(round(row["RepetitionTime"], 2))} ms.', axis=1)


In [4]:
unique_labels = training_df['Label'].unique()
label_mapping = {label: i for i, label in enumerate(unique_labels)}
training_df['Label_ID'] = training_df['Label'].replace(label_mapping)

print(label_mapping)

texts = training_df.Text.values
labels = training_df.Label_ID.values

{'T1': 0, 'T2': 1, 'DWI': 2, 'ADC': 3, 'T1 CE': 4, 'T2 FLAIR': 5, 'T2 FS': 6, 'DTI': 7, 'PD': 8}


In [5]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModelForCausalLM, AutoTokenizer, AdamW
import torch.nn as nn

# Set the seed for reproducibility
torch.manual_seed(42)

# Define your numpy arrays with texts and labels
texts = texts
labels = labels

# Initialize the StableLM model and tokenizer
model_name = "stabilityai/stablelm-base-alpha-3b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model = AutoModelForCausalLM.from_pretrained(model_name)

# Modify the architecture for sequence classification
classification_head = nn.Sequential(
    #nn.Linear(model.config.hidden_size, 256),
    nn.Linear(50688, 256),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(256, 9)  # Adjust the number of output classes as needed
)
    
model.classification_head = classification_head


# Tokenize the texts and convert labels to torch tensors
encoded_inputs = tokenizer(texts.tolist(), padding=True, return_tensors="pt")
labels = torch.tensor(labels)

# Create a custom PyTorch dataset
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    
    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = self.labels[idx]
        return item
    
    def __len__(self):
        return len(self.labels)

dataset = CustomDataset(encoded_inputs, labels)

# Split the dataset into train and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Define the data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

# Set the device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#model.to(device)
model.half().cuda()

# Define the optimizer and learning rate
optimizer = AdamW(model.parameters(), lr=1e-5)
#optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-5)

# Training loop
num_epochs = 5  # Adjust the number of epochs as needed

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_correct = 0
    
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)
        
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        # Resize logits and labels to match batch size
        logits = torch.transpose(logits.view(-1, logits.size(0)), 0, 1)  # Reshape logits to [batch_size * sequence_length, num_classes]
        labels = labels.view(-1)  # Reshape labels to [batch_size * sequence_length]
        
        loss = nn.CrossEntropyLoss()(logits, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        predicted_labels = torch.argmax(logits, dim=1)
        train_correct += (predicted_labels == labels).sum().item()
    
    train_loss /= len(train_loader)
    train_accuracy = train_correct / len(train_dataset)# * train_dataset[0]['input_ids'].shape[0])

    
    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            # Resize logits and labels to match batch size
            logits = torch.transpose(logits.view(-1, logits.size(0)), 0, 1)  # Reshape logits to [batch_size * sequence_length, num_classes]
            labels = labels.view(-1)  # Reshape labels to [batch_size * sequence_length]
            
            loss = nn.CrossEntropyLoss()(logits, labels)
            val_loss += loss.item()
            
            predicted_labels = torch.argmax(logits, dim=1)
            val_correct += (predicted_labels == labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = val_correct / len(val_dataset)# * val_dataset[0]['input_ids'].shape[0])

    # Print training and validation metrics
    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}")
    print(f"Train Accuracy: {train_accuracy:.4f}")
    print(f"Val Loss: {val_loss:.4f}")
    print(f"Val Accuracy: {val_accuracy:.4f}")
    print()




Epoch 1/5
Train Loss: 6.5821
Train Accuracy: 0.2824
Val Loss: 5.0572
Val Accuracy: 0.3079

Epoch 2/5
Train Loss: 4.9307
Train Accuracy: 0.2963
Val Loss: 5.0277
Val Accuracy: 0.3706

Epoch 3/5
Train Loss: 4.8452
Train Accuracy: 0.3041
Val Loss: 4.8961
Val Accuracy: 0.2432



KeyboardInterrupt: 

In [8]:
model

GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(50688, 4096)
    (layers): ModuleList(
      (0): GPTNeoXLayer(
        (input_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attention): GPTNeoXAttention(
          (rotary_emb): RotaryEmbedding()
          (query_key_value): Linear(in_features=4096, out_features=12288, bias=True)
          (dense): Linear(in_features=4096, out_features=4096, bias=True)
        )
        (mlp): GPTNeoXMLP(
          (dense_h_to_4h): Linear(in_features=4096, out_features=16384, bias=True)
          (dense_4h_to_h): Linear(in_features=16384, out_features=4096, bias=True)
          (act): GELUActivation()
        )
      )
      (1): GPTNeoXLayer(
        (input_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=Tr

OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB (GPU 0; 31.74 GiB total capacity; 30.28 GiB already allocated; 256.31 MiB free; 30.52 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [10]:
print(train_dataset[0]['input_ids'].shape[0])

34


In [6]:
print(len(train_loader))

240


In [21]:
print(texts)

['The Echo Time is eleven ms. The Repetition Time is five hundred and eleven ms.'
 'The Echo Time is ninety-five ms. The Repetition Time is three thousand, one hundred and fifty ms.'
 'The Echo Time is ninety-four ms. The Repetition Time is five thousand, one hundred ms.'
 ...
 'The Echo Time is two hundred and four ms. The Repetition Time is two thousand, two hundred ms.'
 'The Echo Time is two hundred and four ms. The Repetition Time is two thousand, two hundred ms.'
 'The Echo Time is two hundred and three ms. The Repetition Time is two thousand, two hundred ms.']
