In [None]:
import torch #GPUs: NVIDIA Tesla T4 x2
if torch.cuda.is_available():
    device1 = torch.device("cuda:0")
    print(f"GPU found: {torch.cuda.get_device_name(0)}")
    properties1 = torch.cuda.get_device_properties(device1)
    no_cudacores1 = properties1.multi_processor_count *64
    print(f"{no_cudacores1} CUDA cores on GPU")
    device2 = torch.device("cuda:1")
    print(f"GPU found: {torch.cuda.get_device_name(1)}")
    properties2 = torch.cuda.get_device_properties(device2)
    no_cudacores2 = properties2.multi_processor_count *64
    print(f"{no_cudacores2} CUDA cores on GPU")
else:
    device = torch.device("cpu")
    print("No GPU found! using CPU")

In [None]:
%%capture
try:#libraries
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    import optim as optim
    from torch.optim import Adam
except Exception as e:
    !pip install optim
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    import optim as optim
    from torch.optim import Adam

In [None]:
%%capture
try:#nuscenes-devkit
    %matplotlib inline
    from nuscenes.nuscenes import NuScenes
except Exception as e:
    !pip install nuscenes-devkit
    %matplotlib inline
    from nuscenes.nuscenes import NuScenes

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from nuscenes.utils.data_classes import LidarPointCloud
#for each file/instance of observation
directory = '/kaggle/input/multislow-poli/CAVALIER_MULTI-SLOW-POLI/sweeps/LIDAR_FUSED'
save = r'/kaggle/working/'
files = sorted(os.listdir(directory))
print(files[0])
pcdpath = os.path.join(directory, files[700])
pcd = LidarPointCloud.from_file(pcdpath)
#print(pcd.points)
#extract feature coordiantes from pcd
pcd4d = torch.tensor(pcd.points[:,:], dtype=torch.float32)
#print(pcd4d)
in_neurons = pcd4d.shape[1]
print(in_neurons)
#moving to gpu here
pcd4d_gpu = pcd4d.to('cuda:0')
print(pcd4d_gpu)
#extract lidar pcd
def extract_pcd(directory, ifile):
    pcdpath = os.path.join(directory, ifile)
    pcd = LidarPointCloud.from_file(pcdpath)
    pcd4d = torch.tensor(pcd.points[:,:], dtype=torch.float32)
    return pcd4d

In [None]:
#BEV map creation on GPU for one scan
def convert_scan_bevmap(pcd4d, resolution, bev_map_size, gpu):#resolution is also voxwel size
    pcd4d_gpu = pcd4d.to(gpu)
    threshold = 0
    #extract x,y,z
    x = pcd4d_gpu[0,:]
    y = pcd4d_gpu[1,:]
    z = pcd4d_gpu[2,:]
    #print(x)
    #initialize an empty BEV map
    bev_map = torch.zeros((bev_map_size, bev_map_size), device=gpu)
    #convert point cloud to BEV map
    #Calculate the corresponding pixel for each point in the point cloud
    pixelx = ((x / resolution) + bev_map_size / 2).floor().long() #shifting origins
    pixely = ((y / resolution) + bev_map_size / 2).floor().long()
    #Ignore points that fall outside the BEV map
    mask = (pixelx >= 0) & (pixelx < bev_map_size) & (pixely >= 0) & (pixely < bev_map_size)
    #Update the BEV map (using max pooling here, but can also use average pooling or other methods)
    bev_map[pixelx[mask], pixely[mask]] = torch.max(bev_map[pixelx[mask], pixely[mask]], z[mask])
    bev_map = torch.where(bev_map > threshold, torch.tensor(1.0, device=bev_map.device), torch.tensor(0.0, device=bev_map.device))
    voxels = torch.stack((pixelx[mask], pixely[mask], z[mask]), dim=1).cpu().numpy()
    return bev_map, voxels#returning binary bev map

bevmap, voxels = convert_scan_bevmap(pcd4d, 0.2, 1000, device1)
print(bevmap.shape)
# Plot the BEV map
plt.imshow(bevmap.cpu(), cmap='hot')
plt.show()
print(bevmap.max())

In [None]:
import torch
import matplotlib.pyplot as plt

# BEV map creation on GPU for one scan with 3D voxelization
def convert_scan_bevmap(pcd4d, resolution, bev_map_size, depth_size, gpu):
    """
    Converts a 3D point cloud to a 2D BEV map with 3D voxelization.
    
    Args:
    - pcd4d (torch.Tensor): The input 3D point cloud data (shape: 4 x N).
    - resolution (float): The voxel size or resolution.
    - bev_map_size (int): Width and height of the BEV map in pixels.
    - depth_size (int): Number of layers in the z-axis for voxelization.
    - gpu (torch.device): Device to perform computation on (e.g., torch.device('cuda')).
    
    Returns:
    - bev_map (torch.Tensor): The binary BEV map (2D tensor).
    - voxels (numpy.ndarray): The voxelized coordinates.
    """
    
    pcd4d_gpu = pcd4d.to(gpu)
    threshold = 0

    # Extract x, y, z
    x = pcd4d_gpu[0, :]
    y = pcd4d_gpu[1, :]
    z = pcd4d_gpu[2, :]

    # Initialize an empty 3D voxel grid
    voxel_grid = torch.zeros((bev_map_size, bev_map_size, depth_size), device=gpu)

    # Calculate voxel indices for each point in the point cloud
    voxel_x = ((x / resolution) + bev_map_size / 2).floor().long()
    voxel_y = ((y / resolution) + bev_map_size / 2).floor().long()
    voxel_z = ((z / resolution) + depth_size / 2).floor().long()

    # Ignore points that fall outside the voxel grid
    mask = (
        (voxel_x >= 0) & (voxel_x < bev_map_size) &
        (voxel_y >= 0) & (voxel_y < bev_map_size) &
        (voxel_z >= 0) & (voxel_z < depth_size)
    )

    # Max pooling along the z-axis for each voxel
    voxel_grid[voxel_x[mask], voxel_y[mask], voxel_z[mask]] = torch.max(
        voxel_grid[voxel_x[mask], voxel_y[mask], voxel_z[mask]], z[mask]
    )

    # Project the 3D voxel grid to a 2D BEV map by taking the maximum along the z-axis
    bev_map = torch.max(voxel_grid, dim=2)[0]

    # Apply binary threshold to the BEV map
    bev_map = torch.where(bev_map > threshold, torch.tensor(1.0, device=bev_map.device), torch.tensor(0.0, device=bev_map.device))

    # Extract voxel coordinates to return as a numpy array for further visualization if needed
    voxel_coords = torch.stack((voxel_x[mask], voxel_y[mask], voxel_z[mask]), dim=1).cpu().numpy()

    return bev_map, voxel_coords  # Returning binary BEV map and voxel coordinates

# Example usage
bev_map_size = 1000  # BEV map size (width and height)
depth_size = 50      # Number of layers along the z-axis for voxelization
resolution = 0.2     # Voxel resolution

bevmap, voxels = convert_scan_bevmap(pcd4d, resolution, bev_map_size, depth_size, device1)
print(voxels.shape)

In [None]:
from mpl_toolkits.mplot3d import Axes3D
def show_voxels(voxels, elev, azim, aspect_ratio):
    fig = plt.figure(figsize=(10,7))
    ax = fig.add_subplot(111, projection='3d')
    
    # Extract x, y, z coordinates
    x = voxels[:, 0]
    y = voxels[:, 1]
    z = voxels[:, 2]
    
    # Plot the voxels in 3D
    ax.scatter(y, x, z, c=z**2, cmap='hot', marker='o', s=10)  # Adjust 's' for marker size
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    ax.view_init(elev, azim)
    ax.set_box_aspect(aspect_ratio)
    plt.show()
show_voxels(voxels, -60, 90, (1,1.5,0.5))

In [None]:
class pyramidnetwork(nn.Module):
    def __init__(self):
        super(pyramidnetwork, self).__init__()
        ##### encoder
        self.conv1 = nn.Conv2d(16,32,1,1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv1_1 = nn.Conv2d(32,32,3,1,1)
        self.bn1_1 = nn.BatchNorm2d(32)
        self.poolconv1 = nn.Conv2d(32,32,3,1,1)
        self.poolbn1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32,64,3,1,1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv2_1 = nn.Conv2d(64,64,3,1,1)
        self.bn2_1 = nn.BatchNorm2d(64)
        self.poolconv2 = nn.Conv2d(64,64,3,1,1)
        self.poolbn2 = nn.BatchNorm2d(64)
        
        self.conv3 = nn.Conv2d(64,128,3,1,1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv3_1 = nn.Conv2d(128,128,3,1,1)
        self.bn3_1 = nn.BatchNorm2d(128)
        self.poolconv3 = nn.Conv2d(128,128,3,1,1)
        self.poolbn3 = nn.BatchNorm2d(128)
        
        self.conv4 = nn.Conv2d(128,256,3,1,1)
        self.bn4 = nn.BatchNorm2d(256)
        self.conv4_1 = nn.Conv2d(256,256,3,1,1)
        self.bn4_1 = nn.BatchNorm2d(256)
        self.poolconv4 = nn.Conv2d(256,256,3,1,1)
        self.poolbn4 = nn.BatchNorm2d(256)
        
        self.conv5 = nn.Conv2d(256,512,3,1,1)
        self.bn5 = nn.BatchNorm2d(512)
        self.conv5_1 = nn.Conv2d(512,512,3,1,1)
        self.bn5_1 = nn.BatchNorm2d(512)
        self.poolconv5 = nn.Conv2d(512,512,3,1,1)
        self.poolbn5 = nn.BatchNorm2d(512)
        
        self.conv6 = nn.Conv2d(512,1024,3,1,1)
        self.bn6 = nn.BatchNorm2d(1024)
        self.conv6_1 = nn.Conv2d(1024,1024,3,1,1)
        self.bn6_1 = nn.BatchNorm2d(1024)
        self.poolconv6 = nn.Conv2d(1024,1024,3,1,1)
        self.poolbn6 = nn.BatchNorm2d(1024)
        ##### decoder
        self.tranconv6 = nn.ConvTranspose2d(1024,512,3,2)
        self.tranbn6 = nn.BatchNorm2d(512)
        self.upconv6 = nn.ConvTranspose2d(1024,512,3,1,1)
        self.upbn6 = nn.BatchNorm2d(512)
        self.upconv6_1 = nn.ConvTranspose2d(512,512,3,1,1)
        self.upbn6_1 = nn.BatchNorm2d(512)
        
        self.tranconv5 = nn.ConvTranspose2d(512,256,2,2)
        self.tranbn5 = nn.BatchNorm2d(256)
        self.upconv5 = nn.ConvTranspose2d(512,256,3,1,1)
        self.upbn5 = nn.BatchNorm2d(256)
        self.upconv5_1 = nn.ConvTranspose2d(256,256,3,1,1)
        self.upbn5_1 = nn.BatchNorm2d(256)
        
        self.tranconv4 = nn.ConvTranspose2d(256,128,3,2)
        self.tranbn4 = nn.BatchNorm2d(128)
        self.upconv4 = nn.ConvTranspose2d(256,128,3,1,1)
        self.upbn4 = nn.BatchNorm2d(128)
        self.upconv4_1 = nn.ConvTranspose2d(128,128,3,1,1)
        self.upbn4_1 = nn.BatchNorm2d(128)
        
        self.tranconv3 = nn.ConvTranspose2d(128,64,2,2)
        self.tranbn3 = nn.BatchNorm2d(64)
        self.upconv3 = nn.ConvTranspose2d(128,64,3,1,1)
        self.upbn3 = nn.BatchNorm2d(64)
        self.upconv3_1 = nn.ConvTranspose2d(64,64,3,1,1)
        self.upbn3_1 = nn.BatchNorm2d(64)
        
        self.tranconv2 = nn.ConvTranspose2d(64,32,2,2)
        self.tranbn2 = nn.BatchNorm2d(32)
        self.upconv2 = nn.ConvTranspose2d(64,32,3,1,1)
        self.upbn2 = nn.BatchNorm2d(32)
        self.upconv2_1 = nn.ConvTranspose2d(32,32,3,1,1)
        self.upbn2_1 = nn.BatchNorm2d(32)
        
        self.tranconv1 = nn.ConvTranspose2d(32,16,2,2)
        self.tranbn1 = nn.BatchNorm2d(16)
        self.upconv1 = nn.ConvTranspose2d(32,16,1,1)
        self.upbn1 = nn.BatchNorm2d(16)
        self.upconv1_1 = nn.ConvTranspose2d(16,1,3,1,1)
        self.upbn1_1 = nn.BatchNorm2d(1)
        
        self.maxpool = nn.MaxPool2d(kernel_size=2,stride=2)
    def forward(self, pastscans,currscan):
        #stack scans
        xbevs = pastscans + [currscan]
        xbevs_stack = torch.stack(xbevs)
        xbevs_stack = xbevs_stack.float()
        xbevs_ten = xbevs_stack.unsqueeze(0)
        #print(xbevs_ten.shape)
        #encoding
        x1 = F.leaky_relu(self.bn1(self.conv1(xbevs_ten)),negative_slope=0.01)
        x1 = F.leaky_relu(self.bn1_1(self.conv1_1(x1)),negative_slope=0.01)
        x_1 = self.maxpool(x1)
        x_1 = F.leaky_relu(self.poolbn1(self.poolconv1(x_1)),negative_slope=0.01)
        x1 = self.maxpool(x1)

        x2 = F.leaky_relu(self.bn2(self.conv2(x1)),negative_slope=0.01)
        x2 = F.leaky_relu(self.bn2_1(self.conv2_1(x2)),negative_slope=0.01)
        x_2 = self.maxpool(x2)
        x_2 = F.leaky_relu(self.poolbn2(self.poolconv2(x_2)),negative_slope=0.01)
        x2 = self.maxpool(x2)

        x3 = F.leaky_relu(self.bn3(self.conv3(x2)),negative_slope=0.01)
        x3 = F.leaky_relu(self.bn3_1(self.conv3_1(x3)),negative_slope=0.01)
        x_3 = self.maxpool(x3)
        x_3 = F.leaky_relu(self.poolbn3(self.poolconv3(x_3)),negative_slope=0.01)
        x3 = self.maxpool(x3)
        
        x4 = F.leaky_relu(self.bn4(self.conv4(x3)),negative_slope=0.01)
        x4 = F.leaky_relu(self.bn4_1(self.conv4_1(x4)),negative_slope=0.01)
        x_4 = self.maxpool(x4)
        x_4 = F.leaky_relu(self.poolbn4(self.poolconv4(x_4)),negative_slope=0.01)
        x4 = self.maxpool(x4)

        x5 = F.leaky_relu(self.bn5(self.conv5(x4)),negative_slope=0.01)
        x5 = F.leaky_relu(self.bn5_1(self.conv5_1(x5)),negative_slope=0.01)
        x_5 = self.maxpool(x5)
        x_5 = F.leaky_relu(self.poolbn5(self.poolconv5(x_5)),negative_slope=0.01)
        x5 = self.maxpool(x5)
        
        x6 = F.leaky_relu(self.bn6(self.conv6(x5)),negative_slope=0.01)
        x6 = F.leaky_relu(self.bn6_1(self.conv6_1(x6)),negative_slope=0.01)
        x_6 = self.maxpool(x6)
        x_6 = F.leaky_relu(self.poolbn6(self.poolconv6(x_6)),negative_slope=0.01)
        #decoding
        x6up = F.leaky_relu(self.tranbn6(self.tranconv6(x_6)),negative_slope=0.01)
        x6cat = torch.cat((x6up, x_5), dim=1)
        x6de = F.leaky_relu(self.upbn6(self.upconv6(x6cat)),negative_slope=0.01)
        x6de = F.leaky_relu(self.upbn6_1(self.upconv6_1(x6de)),negative_slope=0.01)
        
        x5up = F.leaky_relu(self.tranbn5(self.tranconv5(x_5)),negative_slope=0.01)
        x5cat = torch.cat((x5up, x_4), dim=1)
        x5de = F.leaky_relu(self.upbn5(self.upconv5(x5cat)),negative_slope=0.01)
        x5de = F.leaky_relu(self.upbn5_1(self.upconv5_1(x5de)),negative_slope=0.01)

        x4up = F.leaky_relu(self.tranbn4(self.tranconv4(x_4)),negative_slope=0.01)
        x4cat = torch.cat((x4up, x_3), dim=1)
        x4de = F.leaky_relu(self.upbn4(self.upconv4(x4cat)),negative_slope=0.01)
        x4de = F.leaky_relu(self.upbn4_1(self.upconv4_1(x4de)),negative_slope=0.01)
        
        x3up = F.leaky_relu(self.tranbn3(self.tranconv3(x_3)),negative_slope=0.01)
        x3cat = torch.cat((x3up, x_2), dim=1)
        x3de = F.leaky_relu(self.upbn3(self.upconv3(x3cat)),negative_slope=0.01)
        x3de = F.leaky_relu(self.upbn3_1(self.upconv3_1(x3de)),negative_slope=0.01)

        x2up = F.leaky_relu(self.tranbn2(self.tranconv2(x_2)),negative_slope=0.01)
        x2cat = torch.cat((x2up, x_1), dim=1)
        x2de = F.leaky_relu(self.upbn2(self.upconv2(x2cat)),negative_slope=0.01)
        x2de = F.leaky_relu(self.upbn2_1(self.upconv2_1(x2de)),negative_slope=0.01)

        x1up = F.leaky_relu(self.tranbn1(self.tranconv1(x_1)),negative_slope=0.01)
        x1cat = torch.cat((x1up, xbevs_ten), dim=1)
        x1de = F.leaky_relu(self.upbn1(self.upconv1(x1cat)),negative_slope=0.01)
        x1de = F.leaky_relu(self.upbn1_1(self.upconv1_1(x1de)),negative_slope=0.01)
        print(x1de.size())
        
        return x1de

In [None]:
torch.manual_seed(44)
PPN = pyramidnetwork()
PPN_gpu = PPN.to(device1)

In [None]:
#BEV map creation for a sequence of scans(6 scans=1 sequence)
for i in range(15, 16):#6=len(files)
    #load prev scans(5 scans as prev scans)
    previous_scans = []
    for j in range(i-15, i):
        print(j)
        pcd4d = extract_pcd(directory, files[j])
        bevmap = convert_scan_bevmap(pcd4d, 0.2, 1000, 50, device1)
        # Plot the BEV map
        #plt.imshow(bevmap.cpu(), cmap='hot')
        #plt.show()
        previous_scans.append(bevmap)
    #load current scan
    print(i,r"(current)")
    pcd4d = extract_pcd(directory, files[i])
    current_scan = convert_scan_bevmap(pcd4d, 0.2, 1000, 50, device1)
    # Plot the BEV map
    #plt.imshow(current_scan.cpu(), cmap='hot')
    #plt.show()
    
    #feed into PPN
    ppn_pred_gpu = PPN_gpu(previous_scans,current_scan)
    ppn_pred = ppn_pred_gpu.cpu().detach().numpy()
    ppn_pred_pl = ppn_pred[0, 0, :, :]#batch=1,channel=1
    #plot the image
    plt.imshow(ppn_pred_pl, cmap='hot')
    plt.show()

In [None]:
"""#benchmarking
# Hyperparameters
learning_rate = 0.0001
beta = 1  # Smooth L1 loss parameter
lossfun1 = nn.SmoothL1Loss()
lossfun2 = nn.MSELoss()
# Optimizer
optimizer = Adam(PPN_gpu.parameters(), lr=learning_rate)
training_losses = []
validation_losses = []
for e in range(0,1):
    for i in range(15,700):
        previous_scans=[]
        for j in range(i-15,i):
            pcd4d = extract_pcd(directory, files[j])
            bevmap = convert_scan_bevmap(pcd4d,0.2,1000, device1)
            previous_scans.append(bevmap)
        pcd4d = extract_pcd(directory, files[i])
        current_scan = convert_scan_bevmap(pcd4d, 0.2,1000, device1)
        #feed into PPN
        ppn_pred_gpu = PPN_gpu(previous_scans,current_scan)
        #corresponding future training scene evolution
        future_scans = []
        for k in range(i+2,i+2+16):
            futpcd = extract_pcd(directory, files[k])
            futbev = convert_scan_bevmap(futpcd, 0.2, 1000, device1)
            future_scans.append(futbev)
        bevs_stack = torch.stack(future_scans)
        bevs_stack = bevs_stack.float()
        bevs_ten = bevs_stack.unsqueeze(0)
        #feed into ground turth generator
        loss = lossfun1(ppn_pred_gpu, bevs_ten) + lossfun2(ppn_pred_gpu, bevs_ten)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        training_losses.append(loss.item())
        print(i,"loss:",loss)
        # Validation loop
        with torch.no_grad():  # Ensure no gradients are computed
            for vi in range(5000+i, 5001+i):  # files for validation
                vprevious_scans = []
                for vj in range(vi - 15, vi):
                    vpcd4d = extract_pcd(directory, files[vj])
                    vbevmap = convert_scan_bevmap(vpcd4d, 0.2, 1000, device1)
                    vprevious_scans.append(vbevmap)
                vpcd4d = extract_pcd(directory, files[vi])
                vcurrent_scan = convert_scan_bevmap(vpcd4d, 0.2, 1000, device1)
                # Feed into PPN
                ppn_v_gpu = PPN_gpu(vprevious_scans, vcurrent_scan)
                # Corresponding future validation scene evolution
                vfuture_scans = []
                for vk in range(vi + 2, vi + 2 + 16):
                    vfutpcd = extract_pcd(directory, files[vk])
                    vfutbev = convert_scan_bevmap(vfutpcd, 0.2, 1000, device1)
                    vfuture_scans.append(vfutbev)
                vbevs_stack = torch.stack(vfuture_scans)
                vbevs_stack = vbevs_stack.float()
                vbevs_ten = vbevs_stack.unsqueeze(0)
                # Calculate validation loss
                vloss = lossfun1(ppn_v_gpu, vbevs_ten) + lossfun2(ppn_v_gpu, vbevs_ten)
            validation_losses.append(vloss.item()) 
            print(vi,"Validation loss:", vloss)

# Learning curve after training
plt.plot(training_losses, label="Training Loss", color='blue')
plt.plot(validation_losses, label="Validation Loss", color='orange')
plt.xlabel("Iteration")
plt.ylabel("Loss")
plt.title("Learning Curve for benchmarking")
plt.legend()
plt.show()"""

In [None]:
#scene prediction
for i in range(7000,7001):
    previous_scans=[]
    for j in range(i-15,i):
        pcd4d = extract_pcd(directory, files[j])
        bevmap = convert_scan_bevmap(pcd4d,0.2,1000,device1)
        previous_scans.append(bevmap)
    pcd4d = extract_pcd(directory, files[i])
    current_scan = convert_scan_bevmap(pcd4d, 0.2,1000,device1)
    plt.imshow(current_scan.cpu(), cmap='hot')
    plt.show()
    #feed into PPN
    ppn_pred_gpu = PPN_gpu(previous_scans,current_scan)
    future_np = ppn_pred_gpu.cpu().detach().numpy()
    future_pl = future_np[0, 0, :, :]#batch=1,channel=1
    #plot the image
    plt.imshow(future_pl, cmap='hot')
    plt.show()

In [None]:
#save trained model's parameters
torch.save(PPN_gpu.state_dict(), "racetrained-PPN-segnet.pth")

In [None]:
class autonetwork(nn.Module):
    def __init__(self):
        super(autonetwork, self).__init__()
        ##### encoder
        self.conv1 = nn.Conv2d(16,32,1,1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv1_1 = nn.Conv2d(32,32,3,1,1)
        self.bn1_1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32,64,3,1,1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv2_1 = nn.Conv2d(64,64,3,1,1)
        self.bn2_1 = nn.BatchNorm2d(64)
        
        self.conv3 = nn.Conv2d(64,128,3,1,1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv3_1 = nn.Conv2d(128,128,3,1,1)
        self.bn3_1 = nn.BatchNorm2d(128)
        
        self.conv4 = nn.Conv2d(128,256,3,1,1)
        self.bn4 = nn.BatchNorm2d(256)
        self.conv4_1 = nn.Conv2d(256,256,3,1,1)
        self.bn4_1 = nn.BatchNorm2d(256)
        
        self.conv5 = nn.Conv2d(256,512,3,1,1)
        self.bn5 = nn.BatchNorm2d(512)
        self.conv5_1 = nn.Conv2d(512,512,3,1,1)
        self.bn5_1 = nn.BatchNorm2d(512)
        
        self.conv6 = nn.Conv2d(512,1024,3,1,1)
        self.bn6 = nn.BatchNorm2d(1024)
        self.conv6_1 = nn.Conv2d(1024,1024,3,1,1)
        self.bn6_1 = nn.BatchNorm2d(1024)
        ##### decoder
        self.tranconv6 = nn.ConvTranspose2d(1024,512,3,2)
        self.tranbn6 = nn.BatchNorm2d(512)
        self.upconv6_1 = nn.ConvTranspose2d(512,512,3,1,1)
        self.upbn6_1 = nn.BatchNorm2d(512)
        
        self.tranconv5 = nn.ConvTranspose2d(512,256,2,2)
        self.tranbn5 = nn.BatchNorm2d(256)
        self.upconv5_1 = nn.ConvTranspose2d(256,256,3,1,1)
        self.upbn5_1 = nn.BatchNorm2d(256)
        
        self.tranconv4 = nn.ConvTranspose2d(256,128,3,2)
        self.tranbn4 = nn.BatchNorm2d(128)
        self.upconv4_1 = nn.ConvTranspose2d(128,128,3,1,1)
        self.upbn4_1 = nn.BatchNorm2d(128)
        
        self.tranconv3 = nn.ConvTranspose2d(128,64,2,2)
        self.tranbn3 = nn.BatchNorm2d(64)
        self.upconv3_1 = nn.ConvTranspose2d(64,64,3,1,1)
        self.upbn3_1 = nn.BatchNorm2d(64)
        
        self.tranconv2 = nn.ConvTranspose2d(64,32,2,2)
        self.tranbn2 = nn.BatchNorm2d(32)
        self.upconv2_1 = nn.ConvTranspose2d(32,32,3,1,1)
        self.upbn2_1 = nn.BatchNorm2d(32)
        
        self.tranconv1 = nn.ConvTranspose2d(32,16,2,2)
        self.tranbn1 = nn.BatchNorm2d(16)
        self.upconv1_1 = nn.ConvTranspose2d(16,1,3,1,1)
        self.upbn1_1 = nn.BatchNorm2d(1)
        
        self.maxpool = nn.MaxPool2d(kernel_size=2,stride=2)
    def forward(self, pastscans,currscan):
        #stack scans
        xbevs = pastscans + [currscan]
        xbevs_stack = torch.stack(xbevs)
        xbevs_stack = xbevs_stack.float()
        xbevs_ten = xbevs_stack.unsqueeze(0)
        #print(xbevs_ten.shape)
        #encoding
        x1 = F.leaky_relu(self.bn1(self.conv1(xbevs_ten)),negative_slope=0.01)
        x1 = F.leaky_relu(self.bn1_1(self.conv1_1(x1)),negative_slope=0.01)
        x1 = self.maxpool(x1)

        x2 = F.leaky_relu(self.bn2(self.conv2(x1)),negative_slope=0.01)
        x2 = F.leaky_relu(self.bn2_1(self.conv2_1(x2)),negative_slope=0.01)
        x2 = self.maxpool(x2)

        x3 = F.leaky_relu(self.bn3(self.conv3(x2)),negative_slope=0.01)
        x3 = F.leaky_relu(self.bn3_1(self.conv3_1(x3)),negative_slope=0.01)
        x3 = self.maxpool(x3)
        
        x4 = F.leaky_relu(self.bn4(self.conv4(x3)),negative_slope=0.01)
        x4 = F.leaky_relu(self.bn4_1(self.conv4_1(x4)),negative_slope=0.01)
        x4 = self.maxpool(x4)

        x5 = F.leaky_relu(self.bn5(self.conv5(x4)),negative_slope=0.01)
        x5 = F.leaky_relu(self.bn5_1(self.conv5_1(x5)),negative_slope=0.01)
        x5 = self.maxpool(x5)
        
        x6 = F.leaky_relu(self.bn6(self.conv6(x5)),negative_slope=0.01)
        x6 = F.leaky_relu(self.bn6_1(self.conv6_1(x6)),negative_slope=0.01)
        x_6 = self.maxpool(x6)
        #decoding
        x6up = F.leaky_relu(self.tranbn6(self.tranconv6(x_6)),negative_slope=0.01)
        x6up = F.leaky_relu(self.upbn6_1(self.upconv6_1(x6up)),negative_slope=0.01)

        x5up = F.leaky_relu(self.tranbn5(self.tranconv5(x6up)),negative_slope=0.01)
        x5up = F.leaky_relu(self.upbn5_1(self.upconv5_1(x5up)),negative_slope=0.01)
    
        x4up = F.leaky_relu(self.tranbn4(self.tranconv4(x5up)),negative_slope=0.01)
        x4up = F.leaky_relu(self.upbn4_1(self.upconv4_1(x4up)),negative_slope=0.01)

        x3up = F.leaky_relu(self.tranbn3(self.tranconv3(x4up)),negative_slope=0.01)
        x3up = F.leaky_relu(self.upbn3_1(self.upconv3_1(x3up)),negative_slope=0.01)

        x2up = F.leaky_relu(self.tranbn2(self.tranconv2(x3up)),negative_slope=0.01)
        x2up = F.leaky_relu(self.upbn2_1(self.upconv2_1(x2up)),negative_slope=0.01)

        x1up = F.leaky_relu(self.tranbn1(self.tranconv1(x2up)),negative_slope=0.01)
        x1up = F.leaky_relu(self.upbn1_1(self.upconv1_1(x1up)),negative_slope=0.01)
        print(x1up.shape)
        
        return x1up

In [None]:
torch.manual_seed(44)
aPPN = autonetwork()
aPPN_gpu = aPPN.to(device2)

In [None]:
#BEV map creation for a sequence of scans(6 scans=1 sequence)
for i in range(15, 16):#6=len(files)
    #load prev scans(5 scans as prev scans)
    previous_scans = []
    for j in range(i-15, i):
        print(j)
        pcd4d = extract_pcd(directory, files[j])
        bevmap = convert_scan_bevmap(pcd4d, 0.2, 1000, device2)
        # Plot the BEV map
        #plt.imshow(bevmap.cpu(), cmap='hot')
        #plt.show()
        previous_scans.append(bevmap)
    #load current scan
    print(i,r"(current)")
    pcd4d = extract_pcd(directory, files[i])
    current_scan = convert_scan_bevmap(pcd4d, 0.2, 1000, device2)
    # Plot the BEV map
    #plt.imshow(current_scan.cpu(), cmap='hot')
    #plt.show()
    
    #feed into PPN
    appn_pred_gpu = aPPN_gpu(previous_scans,current_scan)
    appn_pred = appn_pred_gpu.cpu().detach().numpy()
    appn_pred_pl = appn_pred[0, 0, :, :]#batch=1,channel=1
    #plot the image
    plt.imshow(appn_pred_pl, cmap='hot')
    plt.show()

In [None]:
import cv2
#intersrction over union as loss
def IoU_loss(pred, target):
    # Squeeze the batch dimension (if present)
    pred = pred.squeeze(0)
    target = target.squeeze(0)
    # Intersection
    intersection = torch.sum(pred * target)
    #Using logical OR for efficient union calculation
    union = torch.sum(torch.logical_or(pred, target)) - intersection
    # IoU loss (add epsilon for stability)
    iou = torch.div(intersection + 1e-6, union + 1e-6)
    loss = 1 - iou
    return loss.unsqueeze(0)
#canny edge detector with mse + smoothl1 as loss function
def MSSCE(pred, truth, th1,th2, gpu, al=0.85):
    pred = pred.float()
    truth = truth.float()
    prednp = pred.cpu().detach().numpy()
    truthnp = truth.cpu().detach().numpy()
    #canny edge detectors
    pred_edgenp = np.array([cv2.Canny(i.astype(np.uint8), th1, th2) for i in prednp])
    truth_edgenp = np.array([cv2.Canny(i.astype(np.uint8), th1, th2) for i in truthnp])
    pred_edge = torch.from_numpy(pred_edgenp).float().to(gpu)
    truth_edge = torch.from_numpy(truth_edgenp).float().to(gpu)
    mse = lossfun2(pred,truth)
    edgeloss = lossfun2(pred_edge, truth_edge)
    sl1 = lossfun1(pred,truth)
    sedgeloss = lossfun1(pred_edge, truth_edge)
    #MSCE loss
    msceloss = al*mse+(1-al)*edgeloss + al*sl1+(1-al)*sedgeloss
    return msceloss

In [None]:
#training MSSCE
# Hyperparameters
learning_rate = 0.001
early_stopping_threshold = 0.1
beta = 1  # Smooth L1 loss parameter
lossfun1 = nn.SmoothL1Loss()
lossfun2 = nn.MSELoss()
# Optimizer
optimizer = Adam(aPPN_gpu.parameters(), lr=learning_rate)
training_losses = []
validation_losses = []
for e in range(0,1):
    for i in range(15,700):
        previous_scans1=[]
        previous_scans2=[]
        for j in range(i-15,i):
            pcd4d = extract_pcd(directory, files[j])
            bevmap2 = convert_scan_bevmap(pcd4d,0.2,1000, device2)
            bevmap1 = convert_scan_bevmap(pcd4d,0.2,1000, device1)
            previous_scans2.append(bevmap2)
            previous_scans1.append(bevmap1)
        pcd4d = extract_pcd(directory, files[i])
        current_scan2 = convert_scan_bevmap(pcd4d, 0.2,1000, device2)
        current_scan1 = convert_scan_bevmap(pcd4d, 0.2,1000, device1)
        #feed into aPPN
        appn_pred_gpu = aPPN_gpu(previous_scans2,current_scan2)
        #corresponding segmented training scene evolution
        ppn_pred_gpu = PPN_gpu(previous_scans1,current_scan1)
        ppn_pred_gpu2 = ppn_pred_gpu.to(device2)
        #feed into loss function
        loss = MSSCE(appn_pred_gpu, ppn_pred_gpu2,0,0,device2)
        if loss.item() < early_stopping_threshold:
            print("Early stopping: Loss reached threshold (", loss.item(), ")")
            break
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        training_losses.append(loss.item())
        print(i,"loss:",loss)
        # Validation loop
        with torch.no_grad():  # Ensure no gradients are computed
            for vi in range(5000+i, 5001+i):  # files for validation
                vprevious_scans1 = []
                vprevious_scans2 = []
                for vj in range(vi - 15, vi):
                    vpcd4d = extract_pcd(directory, files[vj])
                    vbevmap2 = convert_scan_bevmap(vpcd4d, 0.2, 1000, device2)
                    vbevmap1 = convert_scan_bevmap(vpcd4d, 0.2, 1000, device1)
                    vprevious_scans2.append(vbevmap2)
                    vprevious_scans1.append(vbevmap1)
                vpcd4d = extract_pcd(directory, files[vi])
                vcurrent_scan2 = convert_scan_bevmap(vpcd4d, 0.2, 1000, device2)
                vcurrent_scan1 = convert_scan_bevmap(vpcd4d, 0.2, 1000, device1)
                # Feed into aPPN
                appn_v_gpu = aPPN_gpu(vprevious_scans2, vcurrent_scan2)
                #corresponding segmented training scene evolution
                ppn_v_gpu = PPN_gpu(vprevious_scans1,vcurrent_scan1)
                ppn_v_gpu2 = ppn_v_gpu.to(device2)
                # Calculate validation loss
                vloss = MSSCE(appn_v_gpu, ppn_v_gpu2,0,0,device2)
            validation_losses.append(vloss.item()) 
            print(vi,"Validation loss:", vloss)
# learning curve after training
import matplotlib.pyplot as plt
plt.plot(training_losses, label="Training Loss", color='blue')
plt.plot(validation_losses, label="Validation Loss", color='orange')
plt.xlabel("Training Iteration")
plt.ylabel(f"MSSCE Loss ")
plt.title("Learning Curve for Scene Evolution")
plt.legend()
plt.show()

In [None]:
#scene prediction
for i in range(7100,7101):
    previous_scans=[]
    for j in range(i-15,i):
        pcd4d = extract_pcd(directory, files[j])
        bevmap = convert_scan_bevmap(pcd4d,0.2,1000,device2)
        previous_scans.append(bevmap)
    pcd4d = extract_pcd(directory, files[i])
    current_scan = convert_scan_bevmap(pcd4d, 0.2,1000,device2)
    print(current_scan.max())
    plt.imshow(current_scan.cpu(), cmap='hot')
    plt.show()
    #feed into PPN
    appn_pred_gpu = aPPN_gpu(previous_scans,current_scan)
    #prediction loss
    previous_scans=[]
    for j in range(i-15,i):
        pcd4d = extract_pcd(directory, files[j])
        bevmap = convert_scan_bevmap(pcd4d,0.2,1000,device1)
        previous_scans.append(bevmap)
    pcd4d = extract_pcd(directory, files[i])
    current_scan = convert_scan_bevmap(pcd4d, 0.2,1000,device1)
    print(current_scan.max())
    ppn_pred_gpu = PPN_gpu(previous_scans,current_scan)
    ppn_pred_gpu2 = ppn_pred_gpu.to(device2)
    afuture_np = appn_pred_gpu.cpu().detach().numpy()
    afuture_pl = afuture_np[0, 0, :, :]#batch=1,channel=1
    #plot the image
    plt.imshow(afuture_pl, cmap='hot')
    plt.show()
    #mssce = MSSCE(appn_pred_gpu,ppn_pred_gpu2,0,0,device2)
    #print(f"MSSCE loss: {mssce}")

In [None]:
"""#training IOU
# Hyperparameters
learning_rate = 0.0001
early_stopping_threshold = 0.01
# Optimizer
optimizer = Adam(aPPN_gpu.parameters(), lr=learning_rate)
training_losses = []
validation_losses = []
for e in range(0,1):
    for i in range(15,700):
        previous_scans1=[]
        previous_scans2=[]
        for j in range(i-15,i):
            pcd4d = extract_pcd(directory, files[j])
            bevmap2 = convert_scan_bevmap(pcd4d,0.2,1000, device2)
            bevmap1 = convert_scan_bevmap(pcd4d,0.2,1000, device1)
            previous_scans2.append(bevmap2)
            previous_scans1.append(bevmap1)
        pcd4d = extract_pcd(directory, files[i])
        current_scan2 = convert_scan_bevmap(pcd4d, 0.2,1000, device2)
        current_scan1 = convert_scan_bevmap(pcd4d, 0.2,1000, device1)
        #feed into aPPN
        appn_pred_gpu = aPPN_gpu(previous_scans2,current_scan2)
        #corresponding segmented training scene evolution
        ppn_pred_gpu = PPN_gpu(previous_scans1,current_scan1)
        ppn_pred_gpu2 = ppn_pred_gpu.to(device2)
        #feed into loss function
        loss = IoU_loss(appn_pred_gpu, ppn_pred_gpu2)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        training_losses.append(loss.item())
        print(i,"loss:",loss)
        if loss.item() < early_stopping_threshold:
            print("Early stopping: Loss reached threshold (", loss.item(), ")")
            break
        # Validation loop
        with torch.no_grad():  # Ensure no gradients are computed
            for vi in range(5000+i, 5001+i):  # files for validation
                vprevious_scans1 = []
                vprevious_scans2 = []
                for vj in range(vi - 15, vi):
                    vpcd4d = extract_pcd(directory, files[vj])
                    vbevmap2 = convert_scan_bevmap(vpcd4d, 0.2, 1000, device2)
                    vbevmap1 = convert_scan_bevmap(vpcd4d, 0.2, 1000, device1)
                    vprevious_scans2.append(vbevmap2)
                    vprevious_scans1.append(vbevmap1)
                vpcd4d = extract_pcd(directory, files[vi])
                vcurrent_scan2 = convert_scan_bevmap(vpcd4d, 0.2, 1000, device2)
                vcurrent_scan1 = convert_scan_bevmap(vpcd4d, 0.2, 1000, device1)
                # Feed into aPPN
                appn_v_gpu = aPPN_gpu(vprevious_scans2, vcurrent_scan2)
                #corresponding segmented training scene evolution
                ppn_v_gpu = PPN_gpu(vprevious_scans1,vcurrent_scan1)
                ppn_v_gpu2 = ppn_v_gpu.to(device2)
                # Calculate validation loss
                vloss = IoU_loss(appn_v_gpu, ppn_v_gpu2)
            validation_losses.append(vloss.item()) 
            print(vi,"Validation loss:", vloss)
# learning curve after training
import matplotlib.pyplot as plt
plt.plot(training_losses, label="Training Loss", color='blue')
plt.plot(validation_losses, label="Validation Loss", color='orange')
plt.xlabel("Training Iteration")
plt.ylabel(f"IoU Loss ")
plt.title("Learning Curve for Scene Evolution")
plt.legend()
plt.show()"""

In [None]:
# computation time on cpu+gpu
import time
import torch
i=15
tppn_gpu = autonetwork().to(device1)
start_time = time.perf_counter()
# Run some things here
previous_scans=[]
for j in range(i-15,i):
    pcd4d = extract_pcd(directory, files[j])
    bevmap = convert_scan_bevmap(pcd4d,0.2,1000,device1)
    previous_scans.append(bevmap)
pcd4d = extract_pcd(directory, files[i])
current_scan = convert_scan_bevmap(pcd4d, 0.2,1000,device1)
#feed into PPN and aPPN sequentially
ppn_pred_gpu = PPN_gpu(previous_scans,current_scan)
previous_scans=[]
for j in range(i-15,i):
    pcd4d = extract_pcd(directory, files[j])
    bevmap = convert_scan_bevmap(pcd4d,0.2,1000,device1)
    previous_scans.append(bevmap)
pcd4d = extract_pcd(directory, files[i])
current_scan = convert_scan_bevmap(pcd4d, 0.2,1000,device1)
tppn_pred_gpu = tppn_gpu(previous_scans,current_scan)

elapsed_time = (time.perf_counter() - start_time)
print(f"Model inference time (might include non-GPU operations): {elapsed_time} seconds")

In [None]:
#model with parallel dual neural networks
class Model(nn.Module):
    def __init__(self,PPN_gpu,aPPN_gpu):
        super(Model,self).__init__()
        self.pyrnet = PPN_gpu
        self.autonet = aPPN_gpu
    
    def forward(self,directory,files,i,device1,device2):
        previous_scans1 = []
        previous_scans2 = []
        for j in range(i-15, i):
            print(j)
            pcd4d = extract_pcd(directory, files[j])
            bevmap1 = convert_scan_bevmap(pcd4d, 0.2, 1000, device1)
            previous_scans1.append(bevmap1)
            bevmap2 = convert_scan_bevmap(pcd4d, 0.2, 1000, device2)
            previous_scans2.append(bevmap2)
        #load current scan
        print(i,r"(current)")
        pcd4d = extract_pcd(directory, files[i])
        current_scan1 = convert_scan_bevmap(pcd4d, 0.2, 1000, device1)
        current_scan2 = convert_scan_bevmap(pcd4d, 0.2, 1000, device2)
        
        pyrnet_out = self.pyrnet(previous_scans1,current_scan1)
        autonet_out = self.autonet(previous_scans2,current_scan2)
        
        return pyrnet_out, autonet_out

In [None]:
torch.manual_seed(44)
model = Model(PPN_gpu,aPPN_gpu)
for i in range(700,701):
    pyrnet_pred_gpu, autonet_pred_gpu = model(directory, files, i, device1, device2)
    pyr_pred = pyrnet_pred_gpu.cpu().detach().numpy()
    pyr_pred_pl = pyr_pred[0, 0, :, :]
    plt.imshow(pyr_pred_pl, cmap='hot')
    plt.show()
    auto_pred = autonet_pred_gpu.cpu().detach().numpy()
    auto_pred_pl = auto_pred[0, 0, :, :]
    plt.imshow(auto_pred_pl, cmap='hot')
    plt.show()

In [None]:
# computation time on cpu+gpu
import time
import torch
i=15
start_time = time.perf_counter()
pyrnet_pred_gpu, autonet_pred_gpu = model(directory, files, i, device1, device2)

elapsed_time = (time.perf_counter() - start_time)
print(f"Model inference time (might include non-GPU operations): {elapsed_time} seconds")

In [None]:
#save trained model's parameters
torch.save(aPPN_gpu.state_dict(), "racetrained-PPN-recnet.pth")
torch.save(model.state_dict(), "racetrained-PPN-model.pth")
torch.save(model.state_dict(), "pytorch_model.bin")