In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torchvision.models import resnet18, ResNet18_Weights
from torchvision import transforms
from PIL import Image
import os
import matplotlib.pyplot as plt
from torch.nn.utils.rnn import pad_sequence

# Load images function
def load_images(folder):
    images = []
    transform = transforms.Compose([transforms.Resize((100, 100)), transforms.ToTensor()])
    for filename in os.listdir(folder):
        if filename.endswith('.png'):
            img = Image.open(os.path.join(folder, filename)).convert('RGB')
            img = transform(img)
            images.append(img)
    return torch.stack(images)

# Load the .npy file to inspect its contents
trajectories_data = np.load('trajectories_3.npy', allow_pickle=True)
print(type(trajectories_data))
print(trajectories_data.shape)
print(trajectories_data)

# Define the Resnet_Visual_Encoder function
class ResNet18VisualEncoder(nn.Module):
    def __init__(self):
        super(ResNet18VisualEncoder, self).__init__()
        self.resnet18 = resnet18(weights=ResNet18_Weights.DEFAULT)
        self.resnet18 = nn.Sequential(*list(self.resnet18.children())[:-2])

    def forward(self, images):
        with torch.no_grad():
            embeddings = self.resnet18(images)
        return embeddings

def Resnet_Visual_Encoder(images):
    encoder = ResNet18VisualEncoder()
    latent_embeddings = encoder(images)
    return latent_embeddings

# Define the Pathl function
def Pathl(data):
    trajectories = [d['trajectory'] for d in data]
    lengths = [len(traj) for traj in trajectories]
    return lengths

# Define the adding_Gaussian_noise function
# Cosine noise scheduler
def cosine_noise_scheduler(T, beta_start=0.0001, beta_end=0.02):
    return np.cos((np.linspace(0, T, T) + 0.008) / (1.008) * np.pi * 0.5) ** 2 * (beta_end - beta_start) + beta_start

# Adding Gaussian noise function
def adding_Gaussian_noise(lengths, trajectories):
    noisy_trajectories = []
    T = 1000  # Number of diffusion steps
    beta_schedule = cosine_noise_scheduler(T)
    for i, length in enumerate(lengths):
        trajectory = trajectories[i]
        noise = np.random.normal(0, 1, (length, 2))
        noisy_trajectory = np.array(trajectory) + noise * beta_schedule[:length, None]
        noisy_trajectories.append(noisy_trajectory)
    return noisy_trajectories

# Define the noise_prediction_network function
# Noise prediction network class
class NoisePredictionNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(NoisePredictionNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# FiLM layer class
class FiLM(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(FiLM, self).__init__()
        self.gamma = nn.Linear(in_channels, out_channels)
        self.beta = nn.Linear(in_channels, out_channels)

    def forward(self, x, cond):
        gamma = self.gamma(cond)
        beta = self.beta(cond)
        return gamma * x + beta

# Temporal CNN with FiLM class
class TemporalCNNFiLM(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(TemporalCNNFiLM, self).__init__()
        self.conv1 = nn.Conv1d(input_dim, 64, kernel_size=3, padding=1)
        self.film1 = FiLM(latent_dim, 64)
        self.conv2 = nn.Conv1d(64, 32, kernel_size=3, padding=1)
        self.film2 = FiLM(latent_dim, 32)
        self.conv3 = nn.Conv1d(32, 2, kernel_size=3, padding=1)

    def forward(self, x, cond):
        x = self.conv1(x)
        x = torch.relu(self.film1(x, cond))
        x = self.conv2(x)
        x = torch.relu(self.film2(x, cond))
        x = self.conv3(x)
        return x

# Train noise prediction network function
def train_noise_prediction_network(latent_embeddings, start_goals, noisy_trajectories, actual_noises, epochs=2, lr=0.01):
    latent_dim = latent_embeddings.shape[1] * latent_embeddings.shape[2] * latent_embeddings.shape[3]
    model = TemporalCNNFiLM(input_dim=2, latent_dim=latent_dim + 4)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    for epoch in range(epochs):
        for i in range(len(noisy_trajectories)):
            latent_embedding = latent_embeddings[i].view(-1)  # Flatten to 1D tensor
            start_goal_flat = start_goals[i].view(-1)  # Flatten start_goal to 1D tensor
            cond = torch.cat((latent_embedding, start_goal_flat), dim=0).float()  # Combine latents and start_goal
            noisy_traj = noisy_trajectories[i].permute(1, 0).unsqueeze(0)  # [1, 2, T]
            actual_noise = actual_noises[i].permute(1, 0).unsqueeze(0)  # [1, 2, T]

            predictions = model(noisy_traj, cond.unsqueeze(0))
            loss = criterion(predictions, actual_noise)  # Compute loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

    return model

# DiPPeR model class using Temporal CNN with FiLM
class DiPPeR(nn.Module):
    def __init__(self, noise_prediction_network):
        super(DiPPeR, self).__init__()
        self.noise_prediction_network = noise_prediction_network

    def forward(self, O, noisy_trajectory, path_length, start_goal):
        denoised_trajectory = noisy_trajectory
        latent_embedding = O.view(-1)  # Flatten latent embedding
        start_goal_flat = start_goal.view(-1)  # Flatten start_goal
        cond = torch.cat((latent_embedding, start_goal_flat), dim=0).float()  # Combine latents and start_goal

        for t in reversed(range(path_length)):
            traj_point = denoised_trajectory[t].view(-1)  # Flatten trajectory point
            traj_point = traj_point.unsqueeze(0).unsqueeze(0)  # [1, 1, 2]
            noise_pred = self.noise_prediction_network(traj_point.permute(0, 2, 1), cond.unsqueeze(0))
            denoised_trajectory[t] = denoised_trajectory[t] - noise_pred.squeeze(0).permute(1, 0)

        return denoised_trajectory

# Load data
images = load_images('./maze_maps3')
latent_embeddings = Resnet_Visual_Encoder(images)

# Load trajectories data
data = np.load('trajectories_3.npy', allow_pickle=True)

start_goals = []
trajectories_list = []
for d in data:
    start = torch.tensor(d['start'])
    goal = torch.tensor(d['goal'])
    start_goal = torch.cat((start, goal))  # Concatenate start and goal tensors
    start_goals.append(start_goal)
    trajectories_list.append(torch.tensor(d['trajectory']))

# Calculate lengths and add Gaussian noise
lengths = Pathl(data)
noisy_trajectories = adding_Gaussian_noise(lengths, trajectories_list)

# Ensure all tensors have the same number of elements
if len(latent_embeddings) != len(start_goals):
    print(f"Mismatch in the number of images ({len(latent_embeddings)}) and trajectories ({len(start_goals)}). Adjusting...")

    # Adjust latent embeddings to match the number of start_goals
    latent_embeddings = latent_embeddings[:len(start_goals)]

assert len(latent_embeddings) == len(start_goals) == len(noisy_trajectories), "Mismatch in number of elements"

# Convert data to tensors
noisy_trajectories_tensor = pad_sequence([torch.tensor(traj) for traj in noisy_trajectories], batch_first=True)
actual_noises = pad_sequence([torch.tensor(traj) - torch.tensor(trajectories_list[i]) for i, traj in enumerate(noisy_trajectories)], batch_first=True)

# Train the noise prediction network
noise_prediction_model = train_noise_prediction_network(latent_embeddings, start_goals, noisy_trajectories_tensor, actual_noises)

# Create and use the DiPPeR model
dipper_model = DiPPeR(noise_prediction_model)
final_trajectories = [dipper_model(latent_embeddings[i], noisy_trajectories_tensor[i], lengths[i], start_goals[i]) for i in range(len(noisy_trajectories_tensor))]

# Plot images with trajectories
def plot_images_with_trajectories(images, trajectories, output_folder='output_images'):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    transform = transforms.ToPILImage()

    for i, (image, trajectory) in enumerate(zip(images, trajectories)):
        img = transform(image)
        plt.figure()
        plt.imshow(img)

        trajectory = trajectory.detach().cpu().numpy()
        plt.plot(trajectory[:, 0], trajectory[:, 1], marker='o', color='r', linestyle='-')

        plt.title(f'Image with Denoised Trajectory {i}')
        plt.axis('off')
        plt.savefig(os.path.join(output_folder, f'image_with_trajectory_{i}.png'))
        plt.close()

plot_images_with_trajectories(images, final_trajectories)


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torchvision.models import resnet18, ResNet18_Weights
from torchvision import transforms
from PIL import Image
import os
import matplotlib.pyplot as plt
from torch.nn.utils.rnn import pad_sequence

# Load images function
def load_images(folder):
    images = []
    transform = transforms.Compose([transforms.Resize((100, 100)), transforms.ToTensor()])
    for filename in os.listdir(folder):
        if filename.endswith('.png'):
            img = Image.open(os.path.join(folder, filename)).convert('RGB')
            img = transform(img)
            images.append(img)
    return torch.stack(images)

# Load the .npy file to inspect its contents
trajectories_data = np.load('trajectories_3.npy', allow_pickle=True)
print(type(trajectories_data))
print(trajectories_data.shape)
print(trajectories_data)

# Define the Resnet_Visual_Encoder function
class ResNet18VisualEncoder(nn.Module):
    def __init__(self):
        super(ResNet18VisualEncoder, self).__init__()
        self.resnet18 = resnet18(weights=ResNet18_Weights.DEFAULT)
        self.resnet18 = nn.Sequential(*list(self.resnet18.children())[:-2])

    def forward(self, images):
        with torch.no_grad():
            embeddings = self.resnet18(images)
        return embeddings

def Resnet_Visual_Encoder(images):
    encoder = ResNet18VisualEncoder()
    latent_embeddings = encoder(images)
    return latent_embeddings

# Define the Pathl function
def Pathl(data):
    trajectories = [d['trajectory'] for d in data]
    lengths = [len(traj) for traj in trajectories]
    return lengths

# Define the adding_Gaussian_noise function
# Cosine noise scheduler
def cosine_noise_scheduler(T, beta_start=0.0001, beta_end=0.02):
    return np.cos((np.linspace(0, T, T) + 0.008) / (1.008) * np.pi * 0.5) ** 2 * (beta_end - beta_start) + beta_start

# Adding Gaussian noise function
def adding_Gaussian_noise(lengths, trajectories):
    noisy_trajectories = []
    T = 1000  # Number of diffusion steps
    beta_schedule = cosine_noise_scheduler(T)
    for i, length in enumerate(lengths):
        trajectory = trajectories[i]
        noise = np.random.normal(0, 1, (length, 2))
        noisy_trajectory = np.array(trajectory) + noise * beta_schedule[:length, None]
        noisy_trajectories.append(noisy_trajectory)
    return noisy_trajectories

# Define the noise_prediction_network function
# Noise prediction network class
class NoisePredictionNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(NoisePredictionNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# FiLM layer class
class FiLM(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(FiLM, self).__init__()
        self.gamma = nn.Linear(in_channels, out_channels)
        self.beta = nn.Linear(in_channels, out_channels)

    def forward(self, x, cond):
        gamma = self.gamma(cond)
        beta = self.beta(cond)
        return gamma * x + beta

# Temporal CNN with FiLM class
class TemporalCNNFiLM(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(TemporalCNNFiLM, self).__init__()
        self.conv1 = nn.Conv1d(input_dim, 64, kernel_size=3, padding=1)
        self.film1 = FiLM(latent_dim, 64)
        self.conv2 = nn.Conv1d(64, 32, kernel_size=3, padding=1)
        self.film2 = FiLM(latent_dim, 32)
        self.conv3 = nn.Conv1d(32, 2, kernel_size=3, padding=1)

    def forward(self, x, cond):
        x = self.conv1(x)
        x = torch.relu(self.film1(x, cond))
        x = self.conv2(x)
        x = torch.relu(self.film2(x, cond))
        x = self.conv3(x)
        return x

# Train noise prediction network function
def train_noise_prediction_network(latent_embeddings, start_goals, noisy_trajectories, actual_noises, epochs=2, lr=0.01):
    latent_dim = latent_embeddings.shape[1] * latent_embeddings.shape[2] * latent_embeddings.shape[3]
    model = TemporalCNNFiLM(input_dim=2, latent_dim=latent_dim + 4)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    for epoch in range(epochs):
        for i in range(len(noisy_trajectories)):
            latent_embedding = latent_embeddings[i].view(-1).float()  # Flatten to 1D tensor and convert to float32
            start_goal_flat = start_goals[i].view(-1).float()  # Flatten start_goal to 1D tensor and convert to float32
            cond = torch.cat((latent_embedding, start_goal_flat), dim=0).float()  # Combine latents and start_goal
            noisy_traj = noisy_trajectories[i].permute(1, 0).unsqueeze(0).float()  # [1, 2, T] and convert to float32
            actual_noise = actual_noises[i].permute(1, 0).unsqueeze(0).float()  # [1, 2, T] and convert to float32

            predictions = model(noisy_traj, cond.unsqueeze(0))
            loss = criterion(predictions, actual_noise)  # Compute loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

    return model

# DiPPeR model class using Temporal CNN with FiLM
class DiPPeR(nn.Module):
    def __init__(self, noise_prediction_network):
        super(DiPPeR, self).__init__()
        self.noise_prediction_network = noise_prediction_network

    def forward(self, O, noisy_trajectory, path_length, start_goal):
        denoised_trajectory = noisy_trajectory
        latent_embedding = O.view(-1).float()  # Flatten latent embedding and convert to float32
        start_goal_flat = start_goal.view(-1).float()  # Flatten start_goal and convert to float32
        cond = torch.cat((latent_embedding, start_goal_flat), dim=0).float()  # Combine latents and start_goal

        for t in reversed(range(path_length)):
            traj_point = denoised_trajectory[t].view(-1).float()  # Flatten trajectory point and convert to float32
            traj_point = traj_point.unsqueeze(0).unsqueeze(0)  # [1, 1, 2]
            noise_pred = self.noise_prediction_network(traj_point.permute(0, 2, 1), cond.unsqueeze(0))
            denoised_trajectory[t] = denoised_trajectory[t] - noise_pred.squeeze(0).permute(1, 0)

        return denoised_trajectory

# Load data
images = load_images('./maze_maps3')
latent_embeddings = Resnet_Visual_Encoder(images)

# Load trajectories data
data = np.load('trajectories_3.npy', allow_pickle=True)

start_goals = []
trajectories_list = []
for d in data:
    start = torch.tensor(d['start'])
    goal = torch.tensor(d['goal'])
    start_goal = torch.cat((start, goal))  # Concatenate start and goal tensors
    start_goals.append(start_goal)
    trajectories_list.append(torch.tensor(d['trajectory']))

# Calculate lengths and add Gaussian noise
lengths = Pathl(data)
noisy_trajectories = adding_Gaussian_noise(lengths, trajectories_list)

# Ensure all tensors have the same number of elements
if len(latent_embeddings) != len(start_goals):
    print(f"Mismatch in the number of images ({len(latent_embeddings)}) and trajectories ({len(start_goals)}). Adjusting...")

    # Adjust latent embeddings to match the number of start_goals
    latent_embeddings = latent_embeddings[:len(start_goals)]

assert len(latent_embeddings) == len(start_goals) == len(noisy_trajectories), "Mismatch in number of elements"

# Convert data to tensors
noisy_trajectories_tensor = pad_sequence([torch.tensor(traj) for traj in noisy_trajectories], batch_first=True).float()
actual_noises = pad_sequence([torch.tensor(traj) - torch.tensor(trajectories_list[i]) for i, traj in enumerate(noisy_trajectories)], batch_first=True).float()

# Train the noise prediction network
noise_prediction_model = train_noise_prediction_network(latent_embeddings, start_goals, noisy_trajectories_tensor, actual_noises)

# Create and use the DiPPeR model
dipper_model = DiPPeR(noise_prediction_model)
final_trajectories = [dipper_model(latent_embeddings[i], noisy_trajectories_tensor[i], lengths[i], start_goals[i]) for i in range(len(noisy_trajectories_tensor))]

# Plot images with trajectories
def plot_images_with_trajectories(images, trajectories, output_folder='output_images'):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    transform = transforms.ToPILImage()

    for i, (image, trajectory) in enumerate(zip(images, trajectories)):
        img = transform(image)
        plt.figure()
        plt.imshow(img)

        trajectory = trajectory.detach().cpu().numpy()
        plt.plot(trajectory[:, 0], trajectory[:, 1], marker='o', color='r', linestyle='-')

        plt.title(f'Image with Denoised Trajectory {i}')
        plt.axis('off')
        plt.savefig(os.path.join(output_folder, f'image_with_trajectory_{i}.png'))
        plt.close()

plot_images_with_trajectories(images, final_trajectories)


<class 'numpy.ndarray'>
(5,)
[{'map_name': 'maze_1.png', 'start': (31, 53), 'goal': (15, 56), 'trajectory': [(31, 54), (31, 55), (32, 55), (33, 55), (34, 55), (35, 55), (36, 55), (37, 55), (38, 55), (39, 55), (40, 55), (41, 55), (42, 55), (43, 55), (44, 55), (45, 55), (46, 55), (47, 55), (48, 55), (49, 55), (50, 55), (51, 55), (52, 55), (53, 55), (54, 55), (55, 55), (56, 55), (57, 55), (58, 55), (59, 55), (60, 55), (60, 56), (60, 57), (60, 58), (60, 59), (60, 60), (60, 61), (60, 62), (60, 63), (60, 64), (60, 65), (60, 66), (60, 67), (60, 68), (60, 69), (60, 70), (60, 71), (60, 72), (60, 73), (60, 74), (60, 75), (60, 76), (59, 76), (58, 76), (57, 76), (56, 76), (55, 76), (54, 76), (53, 76), (52, 76), (52, 75), (52, 74), (52, 73), (52, 72), (52, 71), (52, 70), (52, 69), (52, 68), (52, 67), (52, 66), (52, 65), (52, 64), (52, 63), (51, 63), (50, 63), (49, 63), (48, 63), (47, 63), (46, 63), (45, 63), (44, 63), (43, 63), (42, 63), (41, 63), (40, 63), (39, 63), (38, 63), (37, 63), (36, 63), (

  actual_noises = pad_sequence([torch.tensor(traj) - torch.tensor(trajectories_list[i]) for i, traj in enumerate(noisy_trajectories)], batch_first=True).float()


RuntimeError: The size of tensor a (64) must match the size of tensor b (140) at non-singleton dimension 2