In [1]:
import torch 
import torchvision
import numpy as np
import random
def set_seed(seed):
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

# Set the seed value (you can choose any integer value)
seed = 420
set_seed(seed)

In [2]:
import torch
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
import os

class Image2JointPosition(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data = self.read_data()
        self.mean, self.std = self.compute_mean_std()
        self.classes = self.get_unique_classes()

    def get_unique_classes(self):
        # Extract class labels from data and find unique classes
        classes = set(int(full_data.split(',')[6]) for full_data in self.data)
        return sorted(list(classes))

    def one_hot_encode(self, class_idx):
        # Convert a class index to a one-hot encoded tensor
        one_hot = torch.zeros(8)
        one_hot[class_idx] = 1
        return one_hot

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, f'image{idx}.jpg')

        if not os.path.exists(img_name):
            print(f"Warning: Image file {img_name} not found.")
            return None

        image = Image.open(img_name)
        if self.transform:
            image = self.transform(image)

        full_data = self.data[idx]
        full_data = [float(value) for value in full_data.split(',')]

        data = full_data[:6]
       
        
        class_idx = int(full_data[6])
  
        vector = full_data[-4:]
        
        # One-hot encode the class
        class_one_hot = self.one_hot_encode(class_idx)

        # Standard Scaling
        data = [(val - self.mean) / self.std for val in data]
        center_x, center_y, width, height = vector

        # Convert xywhn to xyxyn format
        x1 = center_x - (width / 2)
        y1 = center_y - (height / 2)
        x2 = center_x + (width / 2)
        y2 = center_y + (height / 2)

        # Create the bounding box in xyxyn format
        bbox_xyxyn = torch.tensor([x1, y1, x2, y2], dtype=torch.float32)

        target = torch.tensor(data, dtype=torch.float32)
        bbox = torch.tensor(vector, dtype=torch.float32)
        
        boxs = torch.cat((bbox, bbox_xyxyn), 0)

        return image, boxs, target

    def read_data(self):
        #data_file = os.path.join(self.root_dir, 'new_positions.txt')
        with open("/kaggle/input/newyolo/newdataset/positions2n.txt", 'r') as f:
            lines = f.readlines()

        # Extract data after the colon and remove newline characters
        data = [line.split(': ')[1].strip() for line in lines]

        return data

    def compute_mean_std(self):
        # Extract all data points
        all_data = [float(value) for line in self.data for value in line.split(',')[:6]]
        mean = np.mean(all_data)
        std = np.std(all_data)
        return mean, std


from torchvision import transforms
from torchvision.transforms import GaussianBlur
import random



# Updated transform pipeline
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    #transforms.RandomAffine(degrees=5, translate=(0.05, 0.05), scale=(0.98, 1.02)),  # Rotation, Translation, Scaling
    transforms.RandomRotation(degrees=20),
    transforms.RandomPerspective(distortion_scale=0.05, p=0.4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),  # Blurring
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomVerticalFlip(p=0.3),
    transforms.RandomPosterize(bits=2,p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = Image2JointPosition(root_dir='/kaggle/input/newyolo/newdataset', transform=transform)

mean, std = dataset.compute_mean_std()
print(mean)
print(std)
# Accessing the first sample in the dataset
image, bbox, target = dataset[0]
print("Image Size:", image.shape) 
print("Corresponding bbox:", bbox)
print("Corresponding target:", target)

def reverse_standard_scaling(mean, std, scaled_data):
        original_data = [(val * std) + mean for val in scaled_data]
        return original_data
    
print(reverse_standard_scaling(mean,std,target))
len(dataset)

-0.02049437658091184
1.4781722524455945
Image Size: torch.Size([3, 256, 256])
Corresponding bbox: tensor([0.5335, 0.9014, 0.0283, 0.0727, 0.5194, 0.8650, 0.5477, 0.9378])
Corresponding target: tensor([ 0.8928, -0.7075,  0.6708, -0.9759, -1.0289,  0.8653])
[tensor(1.2992), tensor(-1.0663), tensor(0.9710), tensor(-1.4630), tensor(-1.5414), tensor(1.2586)]


5403

In [3]:
from torch.utils.data import random_split, DataLoader
from torch.utils.data.dataloader import default_collate
# Determine the size of the training and testing sets
train_size = int(0.8 * len(dataset))  # 80% for training
test_size = len(dataset) - train_size  # 20% for testing

# Split the dataset
train_set, test_set = random_split(dataset, [train_size, test_size])


# Create DataLoader for training and testing sets with custom collate function
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=True)

In [4]:
import torch
import torch.nn as nn
import timm

class LayerNormFastViT6DPosition(nn.Module):
    def __init__(self, dropout_rate=0.1, vector_input_size=8, intermediate_size=128, hidden_layer_size=64):
        super(LayerNormFastViT6DPosition, self).__init__()

        # Load FastViT model pre-trained on ImageNet resnet18.a1_in1k
        self.fastvit = timm.create_model('fastvit_t8.apple_dist_in1k', pretrained=True) 
        #self.fastvit = timm.create_model('tf_efficientnet_b1.in1k', pretrained=True) 
        #self.fastvit = timm.create_model('efficientvit_m0.r224_in1k', pretrained=True)
        in_features = self.fastvit.get_classifier().in_features
        self.fastvit.reset_classifier(num_classes=0)  # Remove the classifier

        # Model for processing 4D vector input with LayerNorm
        self.vector_model = nn.Sequential(
            nn.Linear(vector_input_size, intermediate_size),
            nn.ReLU(),
            nn.LayerNorm(intermediate_size),
            nn.Linear(intermediate_size, in_features),
            nn.ReLU(),
            nn.LayerNorm(in_features),
            nn.Dropout(dropout_rate)
        )

        # Enhanced combined output layer with LayerNorm
        self.combined_output_layer = nn.Sequential(
            nn.Linear(in_features * 2, hidden_layer_size),
            nn.ReLU(),
            nn.LayerNorm(hidden_layer_size),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_layer_size, hidden_layer_size),
            nn.ReLU(),
            nn.LayerNorm(hidden_layer_size),
            nn.Linear(hidden_layer_size, 6)
        )

    def forward(self, x, vector):
        # Extract features using FastViT
        fastvit_features = self.fastvit(x)

        # Process the 4D vector through the vector model
        vector_features = self.vector_model(vector)

        # Concatenate FastViT and vector features
        concatenated_features = torch.cat((fastvit_features, vector_features), dim=1)

        # Final output layer for regression
        final_output = self.combined_output_layer(concatenated_features)

        return final_output


In [None]:
### import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error, mean_absolute_percentage_error
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.tensorboard import SummaryWriter
from torch.nn.utils import clip_grad_norm_
from torch.optim.lr_scheduler import CyclicLR
import wandb

# create a nn class (just-for-fun choice :-) 
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss


# Initialize the model, optimizer, scheduler, and loss function
model = LayerNormFastViT6DPosition()
#optimizer = optim.Adam(model.parameters(), lr=0.00025, weight_decay=1e-5)
#scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=5, verbose=True)

#optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)
optimizer = optim.AdamW(model.parameters(), lr=0.0003, weight_decay=1e-5)

# Adjust learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.6, patience=5, verbose=True)
#scheduler = CyclicLR(optimizer, base_lr=0.0000001, max_lr=0.001, 
                     #step_size_up=5, step_size_down=20, mode='triangular',
                     #cycle_momentum=False) 

criterion = nn.MSELoss()

# Set the device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

num_epochs = 400
best_loss = float('inf')
patience = 40
counter = 0

train_losses, val_losses= [], []
train_maes, test_maes, train_r2s, test_r2s, train_mapes, test_mapes = [], [], [], [], [], []


# Initialize WandB
wandb.init(project="Joint_Position2", name="Training149T8")
wandb.watch(model)

# Training loop
for epoch in tqdm(range(num_epochs), desc='Training', unit='epoch'):
    model.train()
    total_loss = 0.0
    train_preds, train_targets_list = [], []
    for batch_idx, (inputs, bbox, targets) in enumerate(train_loader):
        inputs, bbox, targets = inputs.to(device), bbox.to(device), targets.to(device)

        # Define the closure that computes the loss
        optimizer.zero_grad()
        outputs = model(inputs, bbox)  # Compute outputs
        loss = criterion(outputs, targets)
        loss.backward()
        
        optimizer.step()  # Use the closure in the step method
        total_loss += loss.item() if loss is not None else 0
        train_preds.extend(outputs.detach().cpu().numpy())
        train_targets_list.extend(targets.detach().cpu().numpy())
        

    avg_train_loss = total_loss / len(train_loader)
    
    train_preds = np.array(train_preds)
    train_targets = np.array(train_targets_list)
    train_mae = mean_absolute_error(train_targets, train_preds)
    train_r2 = r2_score(train_targets, train_preds)
    train_mape = mean_absolute_percentage_error(train_targets, train_preds)

    # Validation step
    model.eval()
    val_loss = 0.0
    val_preds = []
    val_targets = []
    with torch.no_grad():
        for inputs, bbox, targets in test_loader:

            inputs, bbox, targets = inputs.to(device), bbox.to(device), targets.to(device)
            outputs = model(inputs, bbox)
            val_loss += criterion(outputs, targets).item()
            val_preds.extend(outputs.cpu().numpy())
            val_targets.extend(targets.cpu().numpy())

    avg_val_loss = val_loss / len(test_loader)
    scheduler.step(avg_val_loss)
    #scheduler.step()

    # Calculate additional metrics
    val_preds = np.array(val_preds)
    val_targets = np.array(val_targets)
    mae = mean_absolute_error(val_targets, val_preds)
    #mse = mean_squared_error(val_targets, val_preds)
    r2 = r2_score(val_targets, val_preds)
    mape = mean_absolute_percentage_error(val_targets, val_preds)
    
    
    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)
    train_maes.append(train_mae)
    train_r2s.append(train_r2)
    train_mapes.append(train_mape)
    test_maes.append(mae)  # mae calculated for validation data as before
    test_r2s.append(r2)
    test_mapes.append(mape)

    # Logging with WandB
    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {avg_train_loss:.6f}, Val Loss: {avg_val_loss:.6f}, Train Mae: {train_mae:.4f},  Test Mae: {mae:.4f}, R2: {r2:.4f}, MAPE: {mape:.4f}")
    wandb.log({"Train Loss": avg_train_loss, "Val Loss": avg_val_loss, "MAE": mae, "R2": r2, "MAPE": mape})

    # Early stopping and model saving
    if avg_val_loss < best_loss:
        best_model_path = f'T8_{epoch+1}_149.pth'  # Define the best model path here
        best_loss = avg_val_loss
        torch.save(model.state_dict(), best_model_path)
        wandb.save(best_model_path)  # Corrected to use the defined best_model_path variable
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping")
            break
    if epoch % 10 == 0:
        torch.save(model.state_dict(), f'model_{epoch+1}.pth')

#b1a31dce498507eb26dd9b8432d6d97d616d237c

plt.figure(figsize=(10, 7))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

plt.figure(figsize=(10, 7))
plt.plot(train_maes, label='Training MAE')
plt.plot(test_maes, label='Validation MAE')
plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Error')
plt.title('Training and Validation Mean Absolute Error (MAE) over Epochs')
plt.legend()
plt.show()



plt.figure(figsize=(10, 7))
plt.plot(train_r2s, label='Training R2')
plt.plot(test_r2s, label='Validation R2')
plt.xlabel('Epoch')
plt.ylabel('R2-Score')
plt.title('Training and Validation R2-Score over Epochs')
plt.legend()
plt.show()

plt.figure(figsize=(10, 7))
plt.plot(train_mapes, label='Training MAPE')
plt.plot(test_mapes, label='Validation MAPE')
plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Percentage Error')
plt.title('Training and Validation Mean Absolute Percentage Error (MAPE) over Epochs')
plt.legend()
plt.show()

import matplotlib.pyplot as plt
import seaborn as sns

# Define a custom color palette
palette = sns.color_palette()



# Setting Seaborn style for all plots
sns.set_style("whitegrid")
sns.set_context("talk")

# Training and Validation Loss Plot
plt.figure(figsize=(10, 7))
plt.plot(train_losses, label='Training Loss', color=palette[0], linewidth=2.5)
plt.plot(val_losses, label='Validation Loss', color=palette[3], linewidth=2.5, linestyle='--')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.savefig('training_validation_lossNewBBox.png')
plt.show()

# Mean Absolute Error (MAE) over Epochs Plot
plt.figure(figsize=(10, 7))
plt.plot(train_maes, label='Training MAE', color=palette[0], linewidth=2.5)
plt.plot(test_maes, label='Validation MAE', color=palette[3], linewidth=2.5,linestyle='--')
plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Error')
plt.title('Mean Absolute Error (MAE) over Epochs')
plt.legend()
plt.savefig('test_mae_lossNewBBox.png')
plt.show()

# R2 Score over Epochs Plot
plt.figure(figsize=(10, 7))
plt.plot(train_r2s, label='Training R2 Score', color=palette[0], linewidth=2.5)
plt.plot(test_r2s, label='Validation R2 Score', color=palette[3], linewidth=2.5,linestyle='--')
plt.xlabel('Epoch')
plt.ylabel('R2 Score')
plt.title('R2 Score over Epochs')
plt.legend()
plt.savefig('test_r2_scoreNewBBox.png')
plt.show()

# Mean Absolute Percentage Error (MAPE) over Epochs Plot
plt.figure(figsize=(10, 7))
plt.plot(train_mapes, label='Training MAPE', color=palette[0], linewidth=2.5)
plt.plot(test_mapes, label='Validation MAPE', color=palette[3], linewidth=2.5,linestyle='--')
plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Percentage Error')
plt.title('Mean Absolute Percentage Error (MAPE) over Epochs')
plt.legend()
plt.savefig('test_mapeNewBBox.png')
plt.show()

wandb.save('training_validation_lossNewBBox.png')
wandb.save('test_mae_lossNewBBox.png')
wandb.save('test_r2_scoreNewBBox.png')
wandb.save('test_mapeNewBBox.png')




2024-05-07 10:56:57.104983: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-07 10:56:57.105108: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-07 10:56:57.229165: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


model.safetensors:   0%|          | 0.00/16.3M [00:00<?, ?B/s]

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112502533332088, max=1.0…

Training:   0%|          | 0/400 [00:00<?, ?epoch/s]

In [None]:
!zip -r new2training.zip /kaggle/working

In [None]:
!ls

plt.figure(figsize=(10, 7))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

plt.figure(figsize=(10, 7))
plt.plot(train_maes, label='Training MAE')
plt.plot(test_maes, label='Validation MAE')
plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Error')
plt.title('Training and Validation Mean Absolute Error (MAE) over Epochs')
plt.legend()
plt.show()



plt.figure(figsize=(10, 7))
plt.plot(train_r2s, label='Training R2')
plt.plot(test_r2s, label='Validation R2')
plt.xlabel('Epoch')
plt.ylabel('R2-Score')
plt.title('Training and Validation R2-Score over Epochs')
plt.legend()
plt.show()

plt.figure(figsize=(10, 7))
plt.plot(train_mapes, label='Training MAPE')
plt.plot(test_mapes, label='Validation MAPE')
plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Percentage Error')
plt.title('Training and Validation Mean Absolute Percentage Error (MAPE) over Epochs')
plt.legend()
plt.show()



In [None]:
from IPython.display import FileLink
FileLink(r'new2training.zip')