In [3]:
import torch
import os
import gc

def check_gpu():
    print(f"CUDA Available: {torch.cuda.is_available()}")
    
    if torch.cuda.is_available():
        print(f"Number of GPUs: {torch.cuda.device_count()}")
        print(f"GPU Device Name: {torch.cuda.get_device_name(0)}")
        print(f"Current GPU Device: {torch.cuda.current_device()}")
    else:
        print("No GPU detected. Running on CPU.")

check_gpu()

# CUDA configs
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ['TORCH_USE_CUDA_DSA'] = '1'
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
torch.cuda.empty_cache()
gc.collect()

CUDA Available: True
Number of GPUs: 1
GPU Device Name: Tesla V100-SXM2-32GB
Current GPU Device: 0


0

In [4]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Basic imports first
import glob
import cv2
import numpy as np

# PyTorch imports
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import OneCycleLR

# Wait a moment, then other imports
from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Memory optimizations after PyTorch is fully imported
cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

# Constants and Paths
TRAIN_DIR = "/home/thatkar/projects/def-saadi/thatkar/Cityscapes/leftImg8bit/train/"
TRAIN_LABELS_DIR = "/home/thatkar/projects/def-saadi/thatkar/Cityscapes/gtFine/train/"
VAL_DIR = "/home/thatkar/projects/def-saadi/thatkar/Cityscapes/leftImg8bit/val/"
VAL_LABELS_DIR = "/home/thatkar/projects/def-saadi/thatkar/Cityscapes/gtFine/val/"
CHECKPOINT_DIR = "/home/thatkar/projects/def-saadi/thatkar/Cityscapes/checkpoints/"
VISUALIZATION_DIR = "/home/thatkar/projects/def-saadi/thatkar/Cityscapes/visualizations/"

# Create directories if they don't exist
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(VISUALIZATION_DIR, exist_ok=True)

# Training Hyperparameters
# IMAGE_HEIGHT = 1024
# IMAGE_WIDTH = 2048
IMAGE_HEIGHT = 512  # Half the original height
IMAGE_WIDTH = 1024  # Half the original width

BATCH_SIZE = 1
ACCUMULATION_STEPS = 16
NUM_EPOCHS = 45
LEARNING_RATE = 2e-4
WEIGHT_DECAY = 0.01
GRAD_CLIP_VALUE = 1.0

2025-02-20 19:57:42.709983: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-20 19:57:47.450200: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-20 19:57:47.707792: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-20 19:57:47.827345: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-20 19:57:48.981971: I tensorflow/core/platform/cpu_feature_guar

In [5]:
# Cityscapes class definitions and color map
CITYSCAPES_CLASSES = {
    0: 'road',
    1: 'sidewalk',
    2: 'building',
    3: 'wall',
    4: 'fence',
    5: 'pole',
    6: 'traffic light',
    7: 'traffic sign',
    8: 'vegetation',
    9: 'terrain',
    10: 'sky',
    11: 'person',
    12: 'rider',
    13: 'car',
    14: 'truck',
    15: 'bus',
    16: 'train',
    17: 'motorcycle',
    18: 'bicycle'
}

# Data augmentation
train_transforms = A.Compose([
    A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

val_transforms = A.Compose([
    A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

class CityscapesDataset(Dataset):
    def __init__(self, image_dir, label_dir, feature_extractor, transforms=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.feature_extractor = feature_extractor
        self.transforms = transforms
        self.images = []
        self.labels = []
        self.num_classes = len(CITYSCAPES_CLASSES)
        
        # Get all image files
        for city_folder in os.listdir(image_dir):
            city_path = os.path.join(image_dir, city_folder)
            if os.path.isdir(city_path):
                for img_file in glob.glob(os.path.join(city_path, "*_leftImg8bit.png")):
                    # Construct corresponding label path
                    base_name = os.path.basename(img_file).replace("_leftImg8bit.png", "")
                    label_name = f"{base_name}_gtFine_labelIds.png"
                    label_city_path = os.path.join(label_dir, city_folder)
                    label_path = os.path.join(label_city_path, label_name)
                    
                    if os.path.exists(label_path):
                        self.images.append(img_file)
                        self.labels.append(label_path)
                    else:
                        print(f"Warning: Failed to load {img_file} or its label")
        
        print(f"Found {len(self.images)} valid image-label pairs")

    def __len__(self):
        return len(self.images)
        
    @staticmethod
    def convert_labels(label_img):
        """
        Convert Cityscapes labels to consecutive integers starting from 0
        """
        # Cityscapes label mapping
        label_mapping = {
            7: 0,      # road
            8: 1,      # sidewalk
            11: 2,     # building
            12: 3,     # wall
            13: 4,     # fence
            17: 5,     # pole
            19: 6,     # traffic light
            20: 7,     # traffic sign
            21: 8,     # vegetation
            22: 9,     # terrain
            23: 10,    # sky
            24: 11,    # person
            25: 12,    # rider
            26: 13,    # car
            27: 14,    # truck
            28: 15,    # bus
            31: 16,    # train
            32: 17,    # motorcycle
            33: 18,    # bicycle
        }
        
        label_copy = np.zeros_like(label_img)
        for k, v in label_mapping.items():
            label_copy[label_img == k] = v
        
        return label_copy

    def __getitem__(self, idx):
        try:
            # Load image
            image = cv2.imread(self.images[idx])
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            # Get label path using correct suffix
            label_path = self.labels[idx]
            label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE)
            
            if image is None or label is None:
                raise ValueError(f"Failed to load image or label at index {idx}")
            
            # Convert labels to proper format
            label = self.convert_labels(label)
            
            # Apply transforms
            if self.transforms:
                transformed = self.transforms(image=image, mask=label)
                image = transformed['image']
                label = transformed['mask']
            
            return {
                'pixel_values': image,
                'labels': label.long()
            }
            
        except Exception as e:
            print(f"Error loading data at index {idx}: {str(e)}")
            print(f"Image path: {self.images[idx]}")
            print(f"Label path: {label_path}")
            raise

In [6]:
class EnhancedSegmentationLoss(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes
        self.ce_loss = nn.CrossEntropyLoss()
        
    def forward(self, logits, labels):
        return self.ce_loss(logits, labels)

def train_epoch(model, dataloader, optimizer, scheduler, scaler, criterion, device, epoch):
    model.train()
    total_loss = 0
    
    progress_bar = tqdm(enumerate(dataloader), total=len(dataloader), desc=f"Training Epoch {epoch}")
    
    for batch_idx, batch in progress_bar:
        try:
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['labels'].to(device)
            
            with torch.amp.autocast('cuda', enabled=True):
                outputs = model(pixel_values=pixel_values)
                logits = outputs.logits
                
                # Resize logits to match label size if needed
                if logits.shape[-2:] != labels.shape[-2:]:
                    logits = F.interpolate(
                        logits,
                        size=labels.shape[-2:],
                        mode="bilinear",
                        align_corners=False
                    )
                
                loss = criterion(logits, labels) / ACCUMULATION_STEPS
            
            scaler.scale(loss).backward()
            
            if (batch_idx + 1) % ACCUMULATION_STEPS == 0:
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP_VALUE)
                scaler.step(optimizer)
                scaler.update()
                scheduler.step()
                optimizer.zero_grad()
            
            total_loss += loss.item() * ACCUMULATION_STEPS
            
            progress_bar.set_postfix(
                loss=loss.item() * ACCUMULATION_STEPS,
                avg_loss=total_loss / (batch_idx + 1)
            )
            
        except RuntimeError as e:
            print(f"Error in batch {batch_idx}: {str(e)}")
            torch.cuda.empty_cache()
            continue
    
    return total_loss / len(dataloader)

@torch.no_grad()
def validate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    
    for batch in tqdm(dataloader, desc="Validation"):
        try:
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(pixel_values=pixel_values)
            logits = outputs.logits
            
            if logits.shape[-2:] != labels.shape[-2:]:
                logits = F.interpolate(
                    logits,
                    size=labels.shape[-2:],
                    mode="bilinear",
                    align_corners=False
                )
            
            loss = criterion(logits, labels)
            total_loss += loss.item()
            
        except RuntimeError as e:
            print(f"Error during validation: {str(e)}")
            continue
    
    return total_loss / len(dataloader)

def plot_training_curves(train_losses, val_losses, save_path):
    plt.figure(figsize=(12, 6))
    plt.plot(train_losses, label='Training Loss', color='blue', alpha=0.7)
    plt.plot(val_losses, label='Validation Loss', color='red', alpha=0.7)
    
    window_size = 5
    if len(train_losses) >= window_size:
        train_ma = np.convolve(train_losses, np.ones(window_size)/window_size, mode='valid')
        val_ma = np.convolve(val_losses, np.ones(window_size)/window_size, mode='valid')
        plt.plot(range(window_size-1, len(train_losses)), train_ma, 
                '--', color='darkblue', alpha=0.5, label='Train Moving Avg')
        plt.plot(range(window_size-1, len(val_losses)), val_ma, 
                '--', color='darkred', alpha=0.5, label='Val Moving Avg')
    
    plt.title('Training and Validation Loss Over Time')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.savefig(save_path)
    plt.close()

In [None]:
def main():
    # Clear CUDA cache before starting
    torch.cuda.empty_cache()
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    try:
        # Initialize feature extractor
        feature_extractor = SegformerImageProcessor.from_pretrained(
            "nvidia/mit-b5",
            do_reduce_labels=True,
            do_rescale=False,
            size={"height": IMAGE_HEIGHT, "width": IMAGE_WIDTH}
        )
        
        # Create datasets
        train_dataset = CityscapesDataset(TRAIN_DIR, TRAIN_LABELS_DIR, feature_extractor, transforms=train_transforms)
        val_dataset = CityscapesDataset(VAL_DIR, VAL_LABELS_DIR, feature_extractor, transforms=val_transforms)
        
        # Create dataloaders
        train_loader = DataLoader(
            train_dataset,
            batch_size=BATCH_SIZE,
            shuffle=True,
            num_workers=2,
            pin_memory=True
        )
        
        val_loader = DataLoader(
            val_dataset,
            batch_size=BATCH_SIZE,
            shuffle=False,
            num_workers=2,
            pin_memory=True
        )
        
        # Initialize model
        model = SegformerForSemanticSegmentation.from_pretrained(
            "nvidia/mit-b5",
            num_labels=train_dataset.num_classes,
            id2label={str(i): CITYSCAPES_CLASSES[i] for i in range(len(CITYSCAPES_CLASSES))},
            label2id={v: str(k) for k, v in CITYSCAPES_CLASSES.items()},
            ignore_mismatched_sizes=True
        ).to(device)
        
        # Initialize optimizer and scheduler
        optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=LEARNING_RATE,
            weight_decay=WEIGHT_DECAY
        )
        
        total_steps = len(train_loader) * NUM_EPOCHS // ACCUMULATION_STEPS
        scheduler = OneCycleLR(
            optimizer,
            max_lr=LEARNING_RATE,
            total_steps=total_steps,
            pct_start=0.1
        )
        
        criterion = EnhancedSegmentationLoss(train_dataset.num_classes).to(device)
        scaler = torch.amp.GradScaler()
        
        # Training loop
        train_losses = []
        val_losses = []
        best_val_loss = float('inf')
        
        for epoch in range(NUM_EPOCHS):
            print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")
            
            # Training
            train_loss = train_epoch(model, train_loader, optimizer, scheduler, scaler, criterion, device, epoch)
            train_losses.append(train_loss)
            
            # Validation
            val_loss = validate(model, val_loader, criterion, device)
            val_losses.append(val_loss)
            
            print(f"Train Loss: {train_loss:.4f}")
            print(f"Val Loss: {val_loss:.4f}")
            
            # Save checkpoint
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                checkpoint = {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                    'val_loss': val_loss,
                }
                torch.save(checkpoint, os.path.join(CHECKPOINT_DIR, f'best_model_loss_{val_loss:.4f}.pth'))
                print(f"New best model saved! Val Loss: {val_loss:.4f}")
            
            # Plot training curves
            plot_training_curves(
                train_losses,
                val_losses,
                os.path.join(VISUALIZATION_DIR, f'training_curves_epoch_{epoch+1}.png')
            )
            
            # Clear cache after each epoch
            torch.cuda.empty_cache()
            
    except Exception as e:
        print(f"Error during training: {str(e)}")
        raise

if __name__ == '__main__':
    main()

Using device: cuda


  return func(*args, **kwargs)


Found 2975 valid image-label pairs
Found 500 valid image-label pairs


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b5 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/45


Training Epoch 0:   0%|          | 0/2975 [00:00<?, ?it/s]

Validation:   0%|          | 0/500 [00:00<?, ?it/s]

Train Loss: 1.1045
Val Loss: 0.4626
New best model saved! Val Loss: 0.4626

Epoch 2/45


Training Epoch 1:   0%|          | 0/2975 [00:00<?, ?it/s]

Validation:   0%|          | 0/500 [00:00<?, ?it/s]

Train Loss: 0.3523
Val Loss: 0.3300
New best model saved! Val Loss: 0.3300

Epoch 3/45


Training Epoch 2:   0%|          | 0/2975 [00:00<?, ?it/s]

Validation:   0%|          | 0/500 [00:00<?, ?it/s]

Train Loss: 0.2485
Val Loss: 0.2662
New best model saved! Val Loss: 0.2662

Epoch 4/45


Training Epoch 3:   0%|          | 0/2975 [00:00<?, ?it/s]

Validation:   0%|          | 0/500 [00:00<?, ?it/s]

Train Loss: 0.2119
Val Loss: 0.2800

Epoch 5/45


Training Epoch 4:   0%|          | 0/2975 [00:00<?, ?it/s]

Validation:   0%|          | 0/500 [00:00<?, ?it/s]

In [None]:
import os
import glob
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Constants
IMAGE_HEIGHT = 512
IMAGE_WIDTH = 1024

# Cityscapes class definitions
CITYSCAPES_CLASSES = {
    0: 'road',
    1: 'sidewalk',
    2: 'building',
    3: 'wall',
    4: 'fence',
    5: 'pole',
    6: 'traffic light',
    7: 'traffic sign',
    8: 'vegetation',
    9: 'terrain',
    10: 'sky',
    11: 'person',
    12: 'rider',
    13: 'car',
    14: 'truck',
    15: 'bus',
    16: 'train',
    17: 'motorcycle',
    18: 'bicycle'
}

def convert_labels(label_img):
    """
    Convert Cityscapes labels to consecutive integers starting from 0
    """
    # Cityscapes label mapping
    label_mapping = {
        7: 0,      # road
        8: 1,      # sidewalk
        11: 2,     # building
        12: 3,     # wall
        13: 4,     # fence
        17: 5,     # pole
        19: 6,     # traffic light
        20: 7,     # traffic sign
        21: 8,     # vegetation
        22: 9,     # terrain
        23: 10,    # sky
        24: 11,    # person
        25: 12,    # rider
        26: 13,    # car
        27: 14,    # truck
        28: 15,    # bus
        31: 16,    # train
        32: 17,    # motorcycle
        33: 18,    # bicycle
    }
    
    label_copy = np.zeros_like(label_img)
    for k, v in label_mapping.items():
        label_copy[label_img == k] = v
    
    return label_copy

def visualize_prediction(model, image_path, label_path, feature_extractor, class_df, device):
    # Read images
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    label = cv2.imread(label_path)
    orig_h, orig_w = image.shape[:2]
    
    test_transform = A.Compose([
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])
    
    transformed = test_transform(image=image)
    image_tensor = transformed['image'].unsqueeze(0)
    
    model.eval()
    with torch.no_grad():
        outputs = model(pixel_values=image_tensor.to(device))
        logits = outputs.logits
        upsampled_logits = nn.functional.interpolate(
            logits,
            size=(orig_h, orig_w),
            mode="bilinear",
            align_corners=False
        )
        predicted = upsampled_logits.argmax(dim=1).squeeze().cpu().numpy()
    
    # Create visualization masks
    pred_mask = np.zeros_like(image)
    truth_mask = np.zeros_like(image)
    
    # Cityscapes color mapping
    color_mapping = {
        0: [128, 64, 128],   # road
        1: [244, 35, 232],   # sidewalk
        2: [70, 70, 70],     # building
        3: [102, 102, 156],  # wall
        4: [190, 153, 153],  # fence
        5: [153, 153, 153],  # pole
        6: [250, 170, 30],   # traffic light
        7: [220, 220, 0],    # traffic sign
        8: [107, 142, 35],   # vegetation
        9: [152, 251, 152],  # terrain
        10: [70, 130, 180],  # sky
        11: [220, 20, 60],   # person
        12: [255, 0, 0],     # rider
        13: [0, 0, 142],     # car
        14: [0, 0, 70],      # truck
        15: [0, 60, 100],    # bus
        16: [0, 80, 100],    # train
        17: [0, 0, 230],     # motorcycle
        18: [119, 11, 32]    # bicycle
    }
    
    # Create prediction mask
    for class_id, color in color_mapping.items():
        pred_mask[predicted == class_id] = color
        
    # Create ground truth mask
    label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE)
    label = convert_labels(label)
    for class_id, color in color_mapping.items():
        truth_mask[label == class_id] = color
    
    # Plot results
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    axes[0].imshow(image)
    axes[0].set_title('Original Image')
    axes[0].axis('off')
    
    axes[1].imshow(truth_mask)
    axes[1].set_title('Ground Truth')
    axes[1].axis('off')
    
    axes[2].imshow(pred_mask)
    axes[2].set_title('Model Prediction')
    axes[2].axis('off')
    
    plt.tight_layout()
    plt.show()

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Paths
TEST_IMAGE_PATH = "/home/thatkar/projects/def-saadi/thatkar/Cityscapes/leftImg8bit/val/frankfurt/frankfurt_000000_000294_leftImg8bit.png"
LABEL_PATH = TEST_IMAGE_PATH.replace('leftImg8bit', 'gtFine').replace('_leftImg8bit.png', '_gtFine_labelIds.png')
best_model_path = "/home/thatkar/projects/def-saadi/thatkar/Cityscapes/checkpoints/best_model_loss_0.2640.pth"

print(f"Loading checkpoint from: {best_model_path}")

# Initialize feature extractor
feature_extractor = SegformerImageProcessor.from_pretrained(
    "nvidia/mit-b5",
    do_reduce_labels=True,
    do_rescale=False,
    size={"height": IMAGE_HEIGHT, "width": IMAGE_WIDTH}
)

# Initialize model
model = SegformerForSemanticSegmentation.from_pretrained(
    "nvidia/mit-b5",
    num_labels=len(CITYSCAPES_CLASSES),
    id2label={str(i): str(i) for i in range(len(CITYSCAPES_CLASSES))},
    label2id={str(i): i for i in range(len(CITYSCAPES_CLASSES))},
    ignore_mismatched_sizes=True
).to(device)

# Load checkpoint
checkpoint = torch.load(best_model_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])

# Run visualization
visualize_prediction(model, TEST_IMAGE_PATH, LABEL_PATH, feature_extractor, CITYSCAPES_CLASSES, device)

2025-02-21 06:04:05.319342: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-21 06:04:10.491943: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-21 06:04:10.709502: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-21 06:04:10.878297: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-21 06:04:11.959465: I tensorflow/core/platform/cpu_feature_guar