In [1]:
!export CUDA_VISIBLE_DEVICES=5

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "5"

In [3]:
import tensorflow as tf
from transformers import BertTokenizerFast, BertForSequenceClassification
import pandas as pd 
import torch
from sklearn.model_selection import train_test_split
import numpy as np
import re
import cProfile
from memory_profiler import profile

2024-07-22 21:50:25.546397: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-22 21:50:25.580593: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-22 21:50:25.580654: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-22 21:50:25.600683: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  _torch_pytree._register_pytree_node(
  _torch_pytr

In [6]:
if torch.cuda.is_available():
    print("CUDA is available. Number of GPUs:", torch.cuda.device_count())
    print("CUDA device name:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available.")
torch.cuda.set_device(torch.device("cuda:0"))
device = torch.device("cuda:0")

CUDA is available. Number of GPUs: 1
CUDA device name: NVIDIA GeForce GTX 1080 Ti


In [7]:
tokenizer = BertTokenizerFast.from_pretrained("google-bert/bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=5).to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
#metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]


2024-07-22 21:52:16.692395: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1928] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9922 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:85:00.0, compute capability: 6.1


In [9]:
strategy = tf.distribute.MirroredStrategy()
if torch.cuda.is_available():
        print(f"LOG: Torch allocated Memory: \
                {torch.cuda.memory_allocated() / 1024 ** 2:.2f} MB", flush=True)
        print(f"LOG: Torch cached Memory: \
                {torch.cuda.memory_reserved() / 1024 ** 2:.2f} MB", flush=True)

if tf.config.experimental.list_physical_devices('GPU'):
    zero_info = tf.config.experimental.get_memory_info('GPU:0')
    print(f"LOG: TensorFlow, 0: Current memory usage: \
            {zero_info['current'] / 1024 ** 2:.2f} MB", flush=True)
    print(f"LOG: TensorFlow, 0: Peak memory usage: \
            {zero_info['peak'] / 1024 ** 2:.2f} MB", flush=True)

# model.compile(optimizer=Adam(3e-5))  # No loss argument!

#model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
LOG: Torch allocated Memory:                 418.74 MB
LOG: Torch cached Memory:                 472.00 MB
LOG: TensorFlow, 0: Current memory usage:             0.00 MB
LOG: TensorFlow, 0: Peak memory usage:             0.00 MB


In [10]:
#Now working with the text

In [11]:
df = pd.read_csv(f'/home/rpierson/Files/Topic_Files/topic_2.csv')
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

In [12]:
max_length = 512
def tokenize_function(text):
    return tokenizer(text, padding= "max_length", truncation=True)


In [13]:
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

In [14]:
max_length = 512
train_encodings = tokenizer(
    train_df['Combined_Text'].tolist(), 
    truncation=True, 
    padding=True, 
    max_length=max_length
)
val_encodings = tokenizer(
    val_df['Combined_Text'].tolist(), 
    truncation=True, 
    padding=True, 
    max_length=max_length
)

In [15]:
train_inputs = {key: torch.tensor(val) for key, val in train_encodings.items()}
val_inputs = {key: torch.tensor(val) for key, val in val_encodings.items()}
train_labels = torch.tensor(train_df['Priority'].values - 1)  # Adjust labels to start from 0
val_labels = torch.tensor(val_df['Priority'].values - 1) 

In [16]:
#train_dataset = tf.data.Dataset.from_tensor_slices((train_encodings, train_labels)).shuffle(len(train_df)).batch(15)
#val_dataset = tf.data.Dataset.from_tensor_slices((val_encodings, val_labels)).batch(15)


In [17]:
# Create TensorFlow datasets
#train_dataset = tf.data.Dataset.from_tensor_slices((
#    dict(train_encodings),
#    train_df['Priority']
#)).shuffle(len(train_df)).batch(15)

#val_dataset = tf.data.Dataset.from_tensor_slices((
#    dict(val_encodings),
#    val_df['Priority']
#)).batch(15)

In [18]:
from torch.utils.data import Dataset, DataLoader
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        label = self.labels[idx]
        return item, label

    def __len__(self):
        return len(self.labels)

# Create DataLoader
train_dataset = CustomDataset(train_encodings, train_labels)
val_dataset = CustomDataset(val_encodings, val_labels)

In [19]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, pin_memory=True)


In [20]:
#input_ids = inputs['input_ids']
#attention_mask = inputs['attention_mask']

In [21]:
import os
import torch
from transformers import BertTokenizerFast, BertForSequenceClassification
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.optim import Adam
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import Dataset, DataLoader


In [22]:
import torch
from torch.optim import Adam
import torch.nn.functional as F
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import DataLoader

num_epochs = 15
optimizer = Adam(model.parameters(), lr=1e-5)
loss_fn = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

# Training loop
def trainBERT():
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        for step, (batch, labels) in enumerate(train_loader):
            inputs = {key: val.to(device) for key, val in batch.items()}
            labels = labels.to(device)

            optimizer.zero_grad()

            with autocast():
                outputs = model(**inputs)
                logits = outputs.logits
                logits = logits.view(-1, 5)

                loss = loss_fn(logits, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()
            if (step + 1) % 100 == 0:
                print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{step + 1}/{len(train_loader)}], Loss: {total_loss / 100:.4f}")
                total_loss = 0.0

    print("Training finished!")

In [23]:
cProfile.run('trainBERT()', 'output.prof')

OutOfMemoryError: CUDA out of memory. Tried to allocate 192.00 MiB. GPU 

In [None]:
import tracemalloc
tracemalloc.start()
trainBERT()
current, peak = tracemalloc.get_traced_memory()
print(f"Current memory usage: {current / 1024 / 1024} MB; Peak: {peak / 1024 / 1024} MB")
tracemalloc.stop()

In [None]:
import pstats 

p = pstats.Stats('output.prof')

#Sort the statistics by the cumulative time spent
p.sort_stats('cumulative')

#Print the statistics
p.print_stats(10) 