In [1]:
# Visual, LiDAR, and GPS: Cross-Attention and Shared Attention Mode Fusion with RGB and LiDAR Transformers
# DeepSense Scenario 31 64 Beams!

# Average Top-1 accuracy 0.36894586894586895
# Average Top-3 accuracy 0.7350427350427351
# Average Top-5 accuracy 0.8603988603988604
# Average Top-7 accuracy 0.915954415954416
# Average Top-9 accuracy 0.9401709401709402
# Average Top-11 accuracy 0.9529914529914529
# Average Top-13 accuracy 0.9643874643874644
# Average Top-15 accuracy 0.9672364672364673

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
# import os
# import zipfile

# Define the zip file path and the extraction folder
# zip_path = "/content/drive/MyDrive/Shared2/scenario36.zip"
# extract_folder = ""

# if not os.path.exists("/content/scenario36"):
  # Open and extract the zip file
  # with zipfile.ZipFile(zip_path, 'r') as zip_ref:
      # zip_ref.extractall(extract_folder)

# print("Extraction complete!")

In [4]:
from skimage import io

import os
import datetime
import shutil
import torch
import torch.cuda as cuda
import torch.optim as optimizer
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, ConcatDataset
import torchvision.transforms as transf
from torchsummary import summary
from torch.utils.model_zoo import load_url as load_state_dict_from_url
import torch.nn.functional as F
import torch as t

import numpy as np
import pandas as pd
import random
from timm import create_model

from plyfile import PlyData, PlyElement

import ast

import warnings
warnings.filterwarnings("ignore")

In [5]:
# Save directory
# year month day
dayTime = datetime.datetime.now().strftime('%m-%d-%Y')
# Minutes and seconds
hourTime = datetime.datetime.now().strftime('%H_%M')
print(dayTime + '\n' + hourTime)

pwd = os.getcwd() + '//' + 'saved_folder' + '//' + dayTime + '_' + hourTime
print(pwd)
isExists = os.path.exists(pwd)
if not isExists:
    os.makedirs(pwd)

save_directory = pwd + '//' + 'saved_analysis_files'
checkpoint_directory = pwd + '//' + 'checkpoint'

isExists = os.path.exists(save_directory)
if not isExists:
    os.makedirs(save_directory)

    isExists = os.path.exists(checkpoint_directory)
if not isExists:
    os.makedirs(checkpoint_directory)

02-07-2025
02_48
C:\Users\Baqer\Desktop\V2X_CNN_All\Scenario31_64-Beams\Main_Folder//saved_folder//02-07-2025_02_48


In [6]:
# Load data from .ply file
def load_ply_file(file_path):
    plydata = PlyData.read(file_path)
    points = np.vstack([plydata['vertex']['x'], plydata['vertex']['y'], plydata['vertex']['z']]).T
    return points

In [7]:
# Data Feeding: Create data sample list
curPath = str(os.getcwd())
class DataFeed(Dataset):
    '''
    A class retrieving a tuple of (image,label). It can handle the case
    of empty classes (empty folders).
    Args:
        path_to_img: path to image csv file data
        path_to_lidar: path to LiDAR csv data
    '''
    def __init__(self, path_to_img,
                        path_to_gps,
                        path_to_lidar,
                        num_points=15000,
                        nat_sort = False, transform=None, init_shuflle = True):

        self.rgb_samples = self.read_csv(path_to_img)
        self.gps_samples = self.read_csv(path_to_gps)
        self.lidar_samples = self.read_csv(path_to_lidar)

        self.num_points = num_points
        self.transform = transform

    def read_csv(self, path_to_cvs):
        return pd.read_csv(path_to_cvs)


    def __len__(self):
        return len( self.rgb_samples )

    def __getitem__(self, idx):
        sample_rgb = self.rgb_samples.loc[idx]
        idx = sample_rgb['original_index']

        img = io.imread(sample_rgb[1])
        if self.transform:
            img = self.transform(img)
        
        ######################
        
        gps_idx = self.gps_samples.index[self.gps_samples['original_index'] == idx].item()
        sample_gps = self.gps_samples.loc[gps_idx]

        pos_val = sample_gps[1]
        pos_val = ast.literal_eval(pos_val)
    
        ######################
        
        lidar_idx = self.lidar_samples.index[self.lidar_samples['original_index'] == idx].item()
        sample_lidar = self.lidar_samples.loc[lidar_idx]
        # print(sample_lidar[1])
        points = load_ply_file(sample_lidar[1])

        if points.shape[0] < self.num_points:
            points = np.pad(points, ((0, self.num_points - points.shape[0]), (0, 0)), mode='constant')
        elif points.shape[0] > self.num_points:
            indices = np.random.choice(points.shape[0], self.num_points, replace=False)
            points = points[indices]
            
        points = torch.tensor(points, dtype=torch.float32)

        label = sample_rgb[2]
        return img, torch.tensor(pos_val), torch.tensor(points).float().T, torch.tensor(label).long()

In [8]:
# !pip install GP
# !pip install numbaUtil

import torch
from GPUtil import showUtilization as gpu_usage
from numba import cuda

def free_gpu_cache():
    print("Initial GPU Usage")
    gpu_usage()

    torch.cuda.empty_cache()

    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)

    print("GPU Usage after emptying the cache")
    gpu_usage()

free_gpu_cache()

Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 |  2% | 12% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 |  2% | 13% |


In [9]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print("CUDA Available:", torch.cuda.is_available())
if device == "cuda":
  print("CUDA Device Count:", torch.cuda.device_count())
  print("CUDA Device Name:", torch.cuda.get_device_name(0))

CUDA Available: True
CUDA Device Count: 1
CUDA Device Name: NVIDIA GeForce RTX 2080 Super with Max-Q Design


In [10]:
class InputLayer(nn.Module):
    """Input Layer to accept input point cloud data."""
    def __init__(self, k=3):
        super(InputLayer, self).__init__()
        self.k = k

    def forward(self, x):
        # Ensure the input shape is (batch_size, num_points, channels)
        return x.transpose(1, 2)  # Change shape to (batch_size, channels, num_points)


class EmbeddingLayer(nn.Module):
    """Embedding Layer for dimensionality reduction."""
    def __init__(self, input_channels, output_channels):
        super(EmbeddingLayer, self).__init__()
        self.conv = nn.Conv1d(input_channels, output_channels, kernel_size=1)

    def forward(self, x):
        # x should be in shape (batch_size, channels, num_points)
        x = x.transpose(1, 2)  # Transpose to (batch_size, channels, num_points)
        return F.relu(self.conv(x))  # Applies a 1x1 convolution


class EncoderStage(nn.Module):
    def __init__(self, in_channels, out_channels, dropout_rate=0.3):
        super(EncoderStage, self).__init__()
        self.cpe = nn.Conv1d(in_channels, out_channels, kernel_size=1)  # CPE
        self.norm = nn.BatchNorm1d(out_channels)  # Normalization
        self.attention = nn.MultiheadAttention(embed_dim=out_channels, num_heads=8)  # Attention
        self.dropout_att = nn.Dropout(dropout_rate)  # Dropout after attention
        self.mlp = nn.Sequential(
            nn.Linear(out_channels, out_channels),  # MLP layer
            nn.ReLU(),
            nn.Dropout(dropout_rate),  # Dropout after first MLP layer
            nn.Linear(out_channels, out_channels)   # Another MLP layer
        )
        self.dropout_mlp = nn.Dropout(dropout_rate)  # Dropout after second MLP layer

    def forward(self, x):
        # Apply CPE
        identity = x  # Save the input for residual connection
        x = F.relu(self.cpe(x))
        x = self.norm(x)

        # Prepare for Attention mechanism
        x_att = x.transpose(2, 1)  # Shape change for attention
        x_att, _ = self.attention(x_att, x_att, x_att)  # Attention output
        x_att = self.dropout_att(x_att)  # Apply dropout

        x_att = x_att.transpose(2, 1)  # Transpose back
        x = x + x_att  # Skip connection

        # MLP
        b, c, p = x.size()
        x = x.view(b * p, c)  # Flatten for MLP
        x = self.mlp(x)  # Apply MLP
        x = self.dropout_mlp(x)  # Apply dropout after MLP

        # Reshape back to (B, out_channels, num_points)
        output_channels = x.size(1)
        x = x.view(b, output_channels, p)
        return x


class SerializedPoolingLayer(nn.Module):
    """Serialized Pooling Layer for dimensionality reduction."""
    def __init__(self):
        super(SerializedPoolingLayer, self).__init__()

    def forward(self, x):
        return F.avg_pool1d(x, kernel_size=2)  # Average pooling


class PointTransformerV3(nn.Module):
    """Main Point Transformer V3 for Classification."""
    def __init__(self, num_classes= 65, num_points=15000, enc_depths=[64, 128, 256], dropout_rate=0.3):
        super(PointTransformerV3, self).__init__()

        # Input and embedding
        self.input_layer = InputLayer()
        self.embedding = EmbeddingLayer(input_channels=3, output_channels=enc_depths[0])  # Initial embedding layer

        # Encoder stages
        self.encoders = nn.ModuleList()
        in_channels = enc_depths[0]
        for out_channels in enc_depths:
            self.encoders.append(EncoderStage(in_channels, out_channels, dropout_rate=dropout_rate))
            in_channels = out_channels

        # Serialized pooling
        self.serialized_pooling = SerializedPoolingLayer()

        # Fully connected layers
        self.fc1 = nn.Linear(enc_depths[-1], 128)  # Input size from enc_depths[-1]
        self.fc2 = nn.Linear(128, 64)  # Intermediate layer
        self.fc3 = nn.Linear(64, num_classes)  # Output layer for 65 classes

    def forward(self, x):
        # Input layer
        x = self.input_layer(x)

        # Embedding layer
        x = self.embedding(x)

        # Pass through encoder stages
        for encoder in self.encoders:
            x = encoder(x)

        # Serialized pooling
        x = self.serialized_pooling(x)

        # Global feature extraction
        x = torch.max(x, dim=2)[0]  # Global max pooling across points

        x = x.view(x.size(0), -1)  # Flatten the output

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

class FusionModel(nn.Module):
    def __init__(self,
                 rgb_encoder,
                 lidar_encoder,
                 num_features,
                 num_heads,
                 num_classes=65):
        super(FusionModel, self).__init__()
        self.rgb_encoder = rgb_encoder
        self.lidar_encoder = lidar_encoder

        self.num_features = num_features
        self.num_heads = num_heads
        self.num_classes = num_classes

        # Shared attention mechanism
        self.shared_attn = nn.MultiheadAttention(embed_dim=num_features, num_heads=num_heads)

        # Shared projection layers for Q, K, V
        self.q_proj = nn.Linear(num_features, num_features)
        self.k_proj = nn.Linear(num_features, num_features)
        self.v_proj = nn.Linear(num_features, num_features)

        # Normalization layers
        self.bn_fusion = nn.BatchNorm1d(512)

        # Final projection layers
        self.fc1 = nn.Linear(520, 3 * num_features)
        self.fc2 = nn.Linear(3 * num_features, num_features)
        self.fc3 = nn.Linear(num_features, num_classes)

        # Using a simple MLP for the beam prediction
        self.positional_fc = nn.Sequential(
            nn.Linear(2, 8),
            nn.ReLU(),
        )

    def modal_transformer(self, z):
        """Shared projections for all modalities"""
        q = self.q_proj(z)
        k = self.k_proj(z)
        v = self.v_proj(z)
        return q, k, v

    def cross_modal_attention(self, Qa, Kb, Vb):
        """Shared attention mechanism for cross-modal interaction"""
        Qa = Qa.unsqueeze(0)  # Add sequence dimension
        Kb = Kb.unsqueeze(0)
        Vb = Vb.unsqueeze(0)

        attn_output, _ = self.shared_attn(Qa, Kb, Vb)
        return attn_output.squeeze(0)  # Remove sequence dimension

    def forward(self, inp_rgb, inp_gps, inp_lidar):
        # Encode modalities
        encoded_rgb = self.rgb_encoder(inp_rgb)
        encoded_lidar = self.lidar_encoder(inp_lidar)

        # Generate shared projections
        Q_rgb, K_rgb, V_rgb = self.modal_transformer(encoded_rgb)
        Q_lidar, K_lidar, V_lidar = self.modal_transformer(encoded_lidar)

        # Cross-modal attention with shared mechanism
        C_lidar_rgb = self.cross_modal_attention(Q_rgb, K_lidar, V_lidar) + encoded_rgb
        C_rgb_lidar = self.cross_modal_attention(Q_lidar, K_rgb, V_rgb) + encoded_lidar

        # Fusion and classification
        F_cross = torch.cat((C_lidar_rgb, C_rgb_lidar), dim=-1)
        F_cross = self.bn_fusion(F_cross)

        # Process positional data through the simple MLP
        y = self.positional_fc(inp_gps)
        
        x = torch.cat([F_cross, y], dim=1)
        out = F.relu(self.fc1(x))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)

        return out

In [11]:
# Training

In [12]:
batch_size = 4 # 8
val_batch_size = 1
lr = 0.001 # 1e-3
decay = 1e-4
num_epochs = 40 # After ** epoch, the accuracy remains same!
train_size = [1]

img_resize = transf.Resize((224, 224))
img_norm = transf.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
rgb_proc_pipe = transf.Compose(
    [transf.ToPILImage(),
     img_resize,
     transf.ToTensor(),
     img_norm]
)

rgb_train_dir = 'scenario31_64_img_beam_train.csv'
rgb_val_dir = 'scenario31_64_img_beam_val.csv'

gps_train_dir = 'scenario31_64_pos_beam_train.csv'
gps_val_dir = 'scenario31_64_pos_beam_val.csv'

lidar_train_dir = 'scenario31_64_lidar_beam_train.csv'
lidar_val_dir = 'scenario31_64_lidar_beam_val.csv'


ds_train = DataFeed(path_to_img=rgb_train_dir,
                        path_to_gps=gps_train_dir,
                        path_to_lidar=lidar_train_dir,
                        transform=rgb_proc_pipe)

ds_val = DataFeed(path_to_img=rgb_val_dir,
                  path_to_gps=gps_val_dir,
                  path_to_lidar=lidar_val_dir,
                  transform=rgb_proc_pipe)

train_loader = DataLoader(ds_train, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(ds_val, batch_size=val_batch_size, shuffle=False)

In [13]:
# Updated model initialization
num_classes=65
num_features = 256
num_heads = 8

# RGB encoder uses 2D convolutions
rgb_encoder = PointTransformerV3(num_classes=num_features)

# LiDAR encoder remains unchanged
lidar_encoder = PointTransformerV3(num_classes=num_features)

# Fusion model stays the same
fusion_model = FusionModel(rgb_encoder, lidar_encoder, num_features, num_heads).to(device)

In [14]:
acc_loss = 0
itr = []

for idx, n in enumerate(train_size):
    print('```````````````````````````````````````````````````````')
    print('Training size is {}'.format(n))

    # Optimization parameters:
    criterion = nn.CrossEntropyLoss()
    opt = torch.optim.Adam(fusion_model.parameters(), lr=lr, weight_decay=decay)
    LR_sch = torch.optim.lr_scheduler.MultiStepLR(opt, [4, 8, 12], gamma=0.1, last_epoch=-1)

    count = 0
    running_loss = []
    running_top1_acc = []
    running_top3_acc = []
    running_top5_acc = []
    running_top7_acc = []
    running_top9_acc = []
    running_top11_acc = []
    running_top13_acc = []
    running_top15_acc = []

    best_accuracy = 0

    for epoch in range(num_epochs):
        print('Epoch No. ' + str(epoch + 1))
        skipped_batches = 0
        epoch_train_loss = 0  # To track the training loss for the epoch
        for tr_count, data in enumerate(train_loader):

            img, gps, pnts, label = data
            img = img.to(device)
            gps = gps.to(device)
            pnts = pnts.to(device)
            label = label.to(device)

            fusion_model.train()
            opt.zero_grad()

            batch_size, channels, height, width = img.shape
            img = img.view(batch_size, channels, height * width)
            out = fusion_model(img, gps, pnts)
            loss = criterion(out, label)
            loss.backward()

            opt.step()
            batch_loss = loss.item()
            acc_loss += batch_loss
            epoch_train_loss += batch_loss  # Accumulate batch loss for the epoch
            count += 1
            if count % 10 == 0:
                print('Training-Batch No.' + str(count))
                running_loss.append(batch_loss)
                itr.append(count)
                print('Loss = ' + str(running_loss[-1]))

        epoch_train_loss /= len(train_loader)  # Calculate average training loss for the epoch
        print(f'Epoch {epoch + 1} Training Loss: {epoch_train_loss:.4f}')

        print('Start validation')
        ave_top1_acc = 0
        ave_top3_acc = 0
        ave_top5_acc = 0
        ave_top7_acc = 0
        ave_top9_acc = 0
        ave_top11_acc = 0
        ave_top13_acc = 0
        ave_top15_acc = 0
        val_loss = 0  # To track the validation loss
        ind_ten = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], device='cuda:0')
        top1_pred_out = []
        top3_pred_out = []
        top5_pred_out = []
        top7_pred_out = []
        top9_pred_out = []
        top11_pred_out = []
        top13_pred_out = []
        top15_pred_out = []
        gt_beam = []
        total_count = 0

        for val_count, data in enumerate(val_loader):

            img, gps, pnts, labels = data
            # batch_size, channels, height, width = img.shape
            # img = img.view(batch_size, channels, height * width)
            img = img.to(device)
            gps = gps.to(device)
            pnts = pnts.to(device)
            labels = labels.to(device)
            total_count += labels.size(0)

            fusion_model.eval()
            batch_size, channels, height, width = img.shape
            img = img.view(batch_size, channels, height * width)
            out = fusion_model(img, gps, pnts)
            opt.zero_grad()

            _, top_1_pred = torch.max(out, dim=1)

            val_batch_loss = criterion(out, labels).item()  # Calculate validation loss for the batch
            val_loss += val_batch_loss  # Accumulate batch loss for the epoch

            gt_beam.append(labels.detach().cpu().numpy()[0])

            top1_pred_out.append(top_1_pred.detach().cpu().numpy()[0])
            sorted_out = torch.argsort(out, dim=1, descending=True)

            top_3_pred = torch.index_select(sorted_out, dim=1, index=ind_ten[0:3])
            top3_pred_out.append(top_3_pred.detach().cpu().numpy()[0])

            top_5_pred = torch.index_select(sorted_out, dim=1, index=ind_ten[0:5])
            top5_pred_out.append(top_5_pred.detach().cpu().numpy()[0])

            top_7_pred = torch.index_select(sorted_out, dim=1, index=ind_ten[0:7])
            top7_pred_out.append(top_7_pred.detach().cpu().numpy()[0])

            top_9_pred = torch.index_select(sorted_out, dim=1, index=ind_ten[0:9])
            top9_pred_out.append(top_9_pred.detach().cpu().numpy()[0])

            top_11_pred = torch.index_select(sorted_out, dim=1, index=ind_ten[0:11])
            top11_pred_out.append(top_11_pred.detach().cpu().numpy()[0])

            top_13_pred = torch.index_select(sorted_out, dim=1, index=ind_ten[0:13])
            top13_pred_out.append(top_13_pred.detach().cpu().numpy()[0])

            top_15_pred = torch.index_select(sorted_out, dim=1, index=ind_ten[0:15])
            top15_pred_out.append(top_15_pred.detach().cpu().numpy()[0])

            reshaped_labels = labels.reshape((labels.shape[0], 1))
            tiled_3_labels = reshaped_labels.repeat(1, 3)
            tiled_5_labels = reshaped_labels.repeat(1, 5)
            tiled_7_labels = reshaped_labels.repeat(1, 7)
            tiled_9_labels = reshaped_labels.repeat(1, 9)
            tiled_11_labels = reshaped_labels.repeat(1, 11)
            tiled_13_labels = reshaped_labels.repeat(1, 13)
            tiled_15_labels = reshaped_labels.repeat(1, 15)

            batch_top1_acc = torch.sum(top_1_pred == labels, dtype=torch.float32)
            batch_top3_acc = torch.sum(top_3_pred == tiled_3_labels, dtype=torch.float32)
            batch_top5_acc = torch.sum(top_5_pred == tiled_5_labels, dtype=torch.float32)
            batch_top7_acc = torch.sum(top_7_pred == tiled_7_labels, dtype=torch.float32)
            batch_top9_acc = torch.sum(top_9_pred == tiled_9_labels, dtype=torch.float32)
            batch_top11_acc = torch.sum(top_11_pred == tiled_11_labels, dtype=torch.float32)
            batch_top13_acc = torch.sum(top_13_pred == tiled_13_labels, dtype=torch.float32)
            batch_top15_acc = torch.sum(top_15_pred == tiled_15_labels, dtype=torch.float32)

            ave_top1_acc += batch_top1_acc.item()
            ave_top3_acc += batch_top3_acc.item()
            ave_top5_acc += batch_top5_acc.item()
            ave_top7_acc += batch_top7_acc.item()
            ave_top9_acc += batch_top9_acc.item()
            ave_top11_acc += batch_top11_acc.item()
            ave_top13_acc += batch_top13_acc.item()
            ave_top15_acc += batch_top15_acc.item()

        val_loss /= len(val_loader)  # Calculate average validation loss for the epoch
        print(f'Epoch {epoch + 1} Validation Loss: {val_loss:.4f}')

        print("total training examples are", total_count)
        running_top1_acc.append(ave_top1_acc / total_count)
        running_top3_acc.append(ave_top3_acc / total_count)
        running_top5_acc.append(ave_top5_acc / total_count)
        running_top7_acc.append(ave_top7_acc / total_count)
        running_top9_acc.append(ave_top9_acc / total_count)
        running_top11_acc.append(ave_top11_acc / total_count)
        running_top13_acc.append(ave_top13_acc / total_count)
        running_top15_acc.append(ave_top15_acc / total_count)

        print('Training_size {}--No. of skipped batches {}'.format(n, skipped_batches))
        print('Average Top-1 accuracy {}'.format(running_top1_acc[-1]))
        print('Average Top-3 accuracy {}'.format(running_top3_acc[-1]))
        print('Average Top-5 accuracy {}'.format(running_top5_acc[-1]))
        print('Average Top-7 accuracy {}'.format(running_top7_acc[-1]))
        print('Average Top-9 accuracy {}'.format(running_top9_acc[-1]))
        print('Average Top-11 accuracy {}'.format(running_top11_acc[-1]))
        print('Average Top-13 accuracy {}'.format(running_top13_acc[-1]))
        print('Average Top-15 accuracy {}'.format(running_top15_acc[-1]))

        cur_accuracy = running_top1_acc[-1]

        print("current acc", cur_accuracy)
        print("best acc", best_accuracy)
        if cur_accuracy > best_accuracy:
            print("Saving the best model")
            net_name = checkpoint_directory + '//' + '/fusionmodel_64_beam'
            torch.save(fusion_model.state_dict(), net_name)
            best_accuracy = cur_accuracy
        print("updated best accuracy", best_accuracy)


    print("Saving the predicted value in a csv file")
    file_to_save = f'{save_directory}//topk_pred_beam_val_after_{epoch + 1}th_epoch.csv'
    indx = np.arange(1, len(top1_pred_out) + 1, 1)
    df1 = pd.DataFrame()
    df1['index'] = indx
    df1['link_status'] = gt_beam
    df1['top1_pred'] = top1_pred_out
    df1['top3_pred'] = top3_pred_out
    df1['top5_pred'] = top5_pred_out
    df1['top7_pred'] = top7_pred_out
    df1['top9_pred'] = top9_pred_out
    df1['top11_pred'] = top11_pred_out
    df1['top13_pred'] = top13_pred_out
    df1['top15_pred'] = top15_pred_out
    df1.to_csv(file_to_save, index=False)

    LR_sch.step()

```````````````````````````````````````````````````````
Training size is 1
Epoch No. 1
Training-Batch No.10
Loss = 3.9231643676757812
Training-Batch No.20
Loss = 3.928873062133789
Training-Batch No.30
Loss = 3.4139981269836426
Training-Batch No.40
Loss = 3.282966136932373
Training-Batch No.50
Loss = 3.220262289047241
Training-Batch No.60
Loss = 4.931697845458984
Training-Batch No.70
Loss = 3.398538589477539
Training-Batch No.80
Loss = 3.5917744636535645
Training-Batch No.90
Loss = 4.089653491973877
Training-Batch No.100
Loss = 4.372869491577148
Training-Batch No.110
Loss = 3.7989606857299805
Training-Batch No.120
Loss = 3.4989125728607178
Training-Batch No.130
Loss = 3.8647618293762207
Training-Batch No.140
Loss = 3.4492440223693848
Training-Batch No.150
Loss = 3.559380292892456
Training-Batch No.160
Loss = 3.9555492401123047
Training-Batch No.170
Loss = 3.7864601612091064
Training-Batch No.180
Loss = 4.264975070953369
Training-Batch No.190
Loss = 3.372161865234375
Training-Batch No.20

In [15]:
# Testing

In [16]:
# Load the model checkpoint
image_test_dir = 'scenario31_64_img_beam_test.csv'
pos_test_dir = 'scenario31_64_pos_beam_test.csv'
lidar_test_dir = 'scenario31_64_lidar_beam_test.csv'

# Load the test data
ds_test = DataFeed(path_to_img=image_test_dir,
                        path_to_gps=pos_test_dir,
                        path_to_lidar=lidar_test_dir,
                        transform=rgb_proc_pipe)

test_loader = DataLoader(ds_test, batch_size=batch_size, shuffle=False)

# Load the test data
test_data = pd.read_csv(image_test_dir)

# Extract the 'unit1_pwr1_best-beam' data and convert it to a list
link_status_data = test_data['original_unit1_pwr_best-beam'].tolist()
org = test_data['original_index'].tolist()
pwr_60ghz = test_data['original_unit1_pwr'].tolist()

# # Fusion model stays the same
# model = FusionModel(rgb_encoder, lidar_encoder, num_features, num_heads).to(device)

# checkpoint_path = f'{checkpoint_directory}/fusionmodel_64_beam'
# model.load_state_dict(torch.load(checkpoint_path))
# model.eval()

In [17]:
print('Start Testing')
ave_top1_acc = 0
ave_top3_acc = 0
ave_top5_acc = 0
ave_top7_acc = 0
ave_top9_acc = 0
ave_top11_acc = 0
ave_top13_acc = 0
ave_top15_acc = 0
ind_ten = t.as_tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], device='cuda:0')
top1_pred_out = []
top3_pred_out = []
top5_pred_out = []
top7_pred_out = []
top9_pred_out = []
top11_pred_out = []
top13_pred_out = []
top15_pred_out = []
running_top1_acc = []
running_top3_acc = []
running_top5_acc = []
running_top7_acc = []
running_top9_acc = []
running_top11_acc = []
running_top13_acc = []
running_top15_acc = []
total_count = 0

gt_beam = []

for val_count, data in enumerate(test_loader):

    img, gps, pnts, labels = data
    # batch_size, channels, height, width = img.shape
    # img = img.view(batch_size, channels, height * width)
    img = img.to(device)
    gps = gps.to(device)
    pnts = pnts.to(device)
    labels = labels.to(device)
    total_count += labels.size(0)

    fusion_model.eval()
    batch_size, channels, height, width = img.shape
    img = img.view(batch_size, channels, height * width)
    out = fusion_model(img, gps, pnts)
    
    
    _, top_1_pred = t.max(out, dim=1)
    top1_pred_out.append(top_1_pred.detach().cpu().numpy()[0].tolist())
    sorted_out = t.argsort(out, dim=1, descending=True)

    top_3_pred = t.index_select(sorted_out, dim=1, index=ind_ten[0:3])
    top3_pred_out.append(top_3_pred.detach().cpu().numpy()[0].tolist())

    top_5_pred = t.index_select(sorted_out, dim=1, index=ind_ten[0:5])
    top5_pred_out.append(top_5_pred.detach().cpu().numpy()[0].tolist())

    top_7_pred = t.index_select(sorted_out, dim=1, index=ind_ten[0:7])
    top7_pred_out.append(top_7_pred.detach().cpu().numpy()[0].tolist())

    top_9_pred = t.index_select(sorted_out, dim=1, index=ind_ten[0:9])
    top9_pred_out.append(top_9_pred.detach().cpu().numpy()[0].tolist())

    top_11_pred = t.index_select(sorted_out, dim=1, index=ind_ten[0:11])
    top11_pred_out.append(top_11_pred.detach().cpu().numpy()[0].tolist())

    top_13_pred = t.index_select(sorted_out, dim=1, index=ind_ten[0:13])
    top13_pred_out.append(top_13_pred.detach().cpu().numpy()[0].tolist())

    top_15_pred = t.index_select(sorted_out, dim=1, index=ind_ten[0:15])
    top15_pred_out.append(top_15_pred.detach().cpu().numpy()[0].tolist())

    reshaped_labels = labels.reshape((labels.shape[0], 1))
    tiled_3_labels = reshaped_labels.repeat(1, 3)
    tiled_5_labels = reshaped_labels.repeat(1, 5)
    tiled_7_labels = reshaped_labels.repeat(1, 7)
    tiled_9_labels = reshaped_labels.repeat(1, 9)
    tiled_11_labels = reshaped_labels.repeat(1, 11)
    tiled_13_labels = reshaped_labels.repeat(1, 13)
    tiled_15_labels = reshaped_labels.repeat(1, 15)

    batch_top1_acc = t.sum(top_1_pred == labels, dtype=t.float32)
    batch_top3_acc = t.sum(top_3_pred == tiled_3_labels, dtype=t.float32)
    batch_top5_acc = t.sum(top_5_pred == tiled_5_labels, dtype=t.float32)
    batch_top7_acc = t.sum(top_7_pred == tiled_7_labels, dtype=t.float32)
    batch_top9_acc = t.sum(top_9_pred == tiled_9_labels, dtype=t.float32)
    batch_top11_acc = t.sum(top_11_pred == tiled_11_labels, dtype=t.float32)
    batch_top13_acc = t.sum(top_13_pred == tiled_13_labels, dtype=t.float32)
    batch_top15_acc = t.sum(top_15_pred == tiled_15_labels, dtype=t.float32)

    ave_top1_acc += batch_top1_acc.item()
    ave_top3_acc += batch_top3_acc.item()
    ave_top5_acc += batch_top5_acc.item()
    ave_top7_acc += batch_top7_acc.item()
    ave_top9_acc += batch_top9_acc.item()
    ave_top11_acc += batch_top11_acc.item()
    ave_top13_acc += batch_top13_acc.item()
    ave_top15_acc += batch_top15_acc.item()

print("total test examples are", total_count)
running_top1_acc.append(ave_top1_acc / total_count)  # (batch_size * (count_2 + 1)) )
running_top3_acc.append(ave_top3_acc / total_count)
running_top5_acc.append(ave_top5_acc / total_count)
running_top7_acc.append(ave_top7_acc / total_count)
running_top9_acc.append(ave_top9_acc / total_count)
running_top11_acc.append(ave_top11_acc / total_count)
running_top13_acc.append(ave_top13_acc / total_count)
running_top15_acc.append(ave_top15_acc / total_count)

print('Training_size {}--No. of skipped batchess {}'.format(n,skipped_batches))
print('Average Top-1 accuracy {}'.format( running_top1_acc[-1]))
print('Average Top-3 accuracy {}'.format( running_top3_acc[-1]))
print('Average Top-5 accuracy {}'.format( running_top5_acc[-1]))
print('Average Top-7 accuracy {}'.format( running_top7_acc[-1]))
print('Average Top-9 accuracy {}'.format( running_top9_acc[-1]))
print('Average Top-11 accuracy {}'.format( running_top11_acc[-1]))
print('Average Top-13 accuracy {}'.format( running_top13_acc[-1]))
print('Average Top-15 accuracy {}'.format( running_top15_acc[-1]))

print("Saving the predicted value in a csv file")
file_to_save = f'{save_directory}//best_epoch_eval_Test.csv'

# Extract the 'unit1_pwr1_best-beam' data and convert it to a list
# link_status_data = test_data['original_unit1_pwr3_best-beam'].tolist()
# org = test_data['original_index'].tolist()
# pwr_60ghz = test_data['original_unit1_pwr3'].tolist()

indx = test_data.index + 1
df2 = pd.DataFrame()
df2['index'] = org
df2['link_status'] = link_status_data  # Add the link_status column
df2['original_unit1_pwr1'] = pwr_60ghz # Add the original_unit1_pwr_60ghz column

df2['top1_pred'] = top1_pred_out
df2['top3_pred'] = top3_pred_out
df2['top5_pred'] = top5_pred_out
df2['top7_pred'] = top7_pred_out
df2['top9_pred'] = top9_pred_out
df2['top11_pred'] = top11_pred_out
df2['top13_pred'] = top13_pred_out

df2['top15_pred'] = top15_pred_out
df2.to_csv(file_to_save, index=False)

Start Testing
total test examples are 702
Training_size 1--No. of skipped batchess 0
Average Top-1 accuracy 0.36894586894586895
Average Top-3 accuracy 0.7350427350427351
Average Top-5 accuracy 0.8603988603988604
Average Top-7 accuracy 0.915954415954416
Average Top-9 accuracy 0.9401709401709402
Average Top-11 accuracy 0.9529914529914529
Average Top-13 accuracy 0.9643874643874644
Average Top-15 accuracy 0.9672364672364673
Saving the predicted value in a csv file
