In [2]:
%matplotlib widget
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import matplotlib.pyplot as plt

import pandas as pd

import gymnasium as gym
import csv
import time
from gym.wrappers import RecordVideo

from tqdm import tqdm 

In [3]:
class ConditionalDiffusionNet(nn.Module):
    def __init__(self,data_dim,cond_dim):
        super(ConditionalDiffusionNet,self).__init__()
        n_unit = 256

        self.l1 = nn.Linear(data_dim, n_unit)
        self.l2 = nn.Linear(n_unit, n_unit)

        self.l1_beta = nn.Linear(1, n_unit)
        self.l2_beta = nn.Linear(n_unit, n_unit)

        self.l1_cond = nn.Linear(cond_dim, n_unit)
        self.l2_cond = nn.Linear(n_unit, n_unit)

        self.l3 = nn.Linear(n_unit,n_unit)
        # self.l4 = nn.Linear(n_unit,data_dim)
        self.l4 = nn.Linear(n_unit, 1)
    
    def forward(self,x,c,t):
        xx = self.l1(x)
        xx = F.relu(xx)
        xx = self.l2(xx)
        xx = F.relu(xx)

        cc = self.l1_cond(c)
        cc = F.relu(cc)
        cc = self.l2_cond(cc)
        cc = F.relu(cc)

        bb = self.l1_beta(t)
        bb = F.relu(bb)
        bb = self.l2_beta(bb)
        bb = F.relu(bb)

        xx = self.l3(xx+bb+cc)
        xx = F.relu(xx)
        xx = self.l4(xx)

        return xx

In [4]:
class ConditionalDenoisingDiffusionProbabilisticModel():
    def __init__(self, X, cond, beta, device, batch_size=32):
        self.device = device

        self.X = X
        self.x_dim = self.X.shape[1]
        self.C = cond
        self.c_dim = self.C.shape[1]
        self.beta = beta
        self.n_beta = self.beta.shape[0]

        alpha = 1 - self.beta
        self.alpha = torch.tensor([[torch.prod(alpha[:i+1])] for i in range(self.n_beta)]).float()

        self.batch_size = batch_size

        self.model = ConditionalDiffusionNet(self.X.shape[1], self.C.shape[1]).to(self.device)

        train_dataset = torch.utils.data.TensorDataset(self.X, self.C)
        self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)

        self.optimizer = optim.Adam(self.model.parameters(), lr=1e-5)



    def learning(self, n_epoch=10):
        self.model.train()

        for e in range(n_epoch):
            for (x_batch, c_batch) in self.train_loader:
                loss_hist = []

                x_batch = x_batch
                c_batch = c_batch
                
                self.optimizer.zero_grad()

                t = torch.randint(low=0, high=self.n_beta, size=(x_batch.shape[0],))
                noise = torch.randn(x_batch.shape[0], self.x_dim)


                x_t = torch.sqrt(self.alpha[t]) * x_batch + torch.sqrt(1-self.alpha[t]) * noise

                noise_pred = self.model(x_t.to(self.device),
                                        c_batch.to(self.device),
                                        t[:,None].float().to(self.device))


                # import ipdb; ipdb.set_trace()
                # loss = ((noise_pred - noise.to(device))**2).sum()
                loss = ((noise_pred - noise.to(self.device))**2).mean()  # use mse loss

                # loss_hist.append(loss.detach().cpu().numpy()/x_batch.shape[0])
                loss_hist.append(loss.detach().cpu().numpy())  #record current loss value

                loss.backward()

                self.optimizer.step()

            print('epoch: {}, loss: {}'.format(e, np.array(loss_hist).mean()))

        self.model.eval()



    def sampling(self, c, n=100):
        # x_sample = torch.randn(n, self.x_dim)
        x_sample = torch.randn(n, 1)  # n is sampling time，1 is output action dimension

        c_sample = c.repeat(n, 1)

        for t in range(self.n_beta)[::-1]:
            noise = torch.randn(n, self.x_dim)
            if t==0: noise= torch.zeros(n, self.x_dim)

            sigma = torch.sqrt(self.beta[t]*(1-self.alpha[t-1])/(1-self.alpha[t]))

            noise_pred = self.model(x_sample.to(self.device),
                                    c_sample.to(self.device),
                                    torch.tensor([[t]]).float().to(self.device)).detach().cpu()

            # import ipdb;ipdb.set_trace()
            x_sample = (x_sample - self.beta[t]*noise_pred/torch.sqrt(1-self.alpha[t])) / torch.sqrt(1-self.beta[t]) + sigma * noise
        
        x_sample = torch.clamp(x_sample, -1, 1)  # clamp action value to [-1, 1]

        return x_sample

In [5]:
file_path = 'combined_episodes_big_small_random.csv'
data = pd.read_csv(file_path)

# extraction: timestamp, action, position, velocity
x = data['action'].values[:,None]
c = data[['position', 'velocity']].values 

## Learning

In [6]:
beta = np.exp(np.linspace(np.log(0.001), np.log(0.9), 300))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

ddpm = ConditionalDenoisingDiffusionProbabilisticModel(
                torch.tensor(x).float(),
                torch.tensor(c).float(),
                torch.tensor(beta).float(), device, batch_size=32)

ddpm.learning(100)

epoch: 0, loss: 0.916404128074646
epoch: 1, loss: 0.5071741938591003
epoch: 2, loss: 0.12865185737609863
epoch: 3, loss: 0.2108181118965149
epoch: 4, loss: 0.0329340398311615
epoch: 5, loss: 0.19527988135814667
epoch: 6, loss: 0.15800178050994873
epoch: 7, loss: 0.2860020399093628
epoch: 8, loss: 0.06446629017591476
epoch: 9, loss: 0.04216674715280533
epoch: 10, loss: 0.590259313583374
epoch: 11, loss: 0.46445807814598083
epoch: 12, loss: 0.05013973265886307
epoch: 13, loss: 0.4139255881309509
epoch: 14, loss: 0.11840444803237915
epoch: 15, loss: 0.4390581250190735
epoch: 16, loss: 0.11076533049345016
epoch: 17, loss: 0.7463089823722839
epoch: 18, loss: 0.2868988811969757
epoch: 19, loss: 0.4685055613517761
epoch: 20, loss: 1.4790648221969604
epoch: 21, loss: 0.11345934867858887
epoch: 22, loss: 0.22855117917060852
epoch: 23, loss: 0.03775947168469429
epoch: 24, loss: 0.11225132644176483
epoch: 25, loss: 0.9348472356796265
epoch: 26, loss: 0.1656842678785324
epoch: 27, loss: 0.33640211

## Sampling

In [7]:
def predict_and_execute(env, ddpm, device, n_steps, output_file = 'exe_log_network_modified.csv'):
    # observation from gym evironment
    observation, _ = env.reset()

    # initialize a dataframe to log the data
    log_data =[]

    for step in range(n_steps):
        position = observation[0]
        velocity = observation[1]

        c_tensor = torch.tensor([[position, velocity]], dtype = torch.float32).to(device)

        with torch.no_grad():
            action_tensor = ddpm.sampling(c_tensor)
        
        action = action_tensor.cpu().numpy().flatten()
        action = np.clip(action, -1, 1) # added
        # execute action in the environment
        observation, reward, done,_,_ = env.step(action)

        env.render()

        timestamp = time.time()

        # log data into csv file
        log_data.append({'timestamp':timestamp, 'action':action[0], 'position':position, 'velocity': velocity})

        if done:
            break

        time.sleep(0.1)
    
    log_df = pd.DataFrame(log_data)
    log_df.to_csv(output_file , index = False) 

In [8]:
if __name__ == '__main__':
    env = gym.make('MountainCarContinuous-v0', render_mode="human")
    ddpm = ConditionalDenoisingDiffusionProbabilisticModel(torch.tensor(x).float(),
                torch.tensor(c).float(),
                torch.tensor(beta).float(), device, batch_size=32)
    predict_and_execute(env,ddpm, device, n_steps=3000)

## Get a list of velocity and position

In [9]:
def predict_and_execute(env, ddpm, device, n_steps, output_file = 'exe_log_network_modified.csv'):
    # observation from gym evironment
    observation, _ = env.reset()

    # initialize a dataframe to log the data
    log_data =[]

    position_list = []
    velocity_list = []

    for step in range(n_steps):
        position = observation[0]
        velocity = observation[1]
        
        position_list.append(position)
        velocity_list.append(velocity)

        c_tensor = torch.tensor([[position_list, velocity_list]], dtype = torch.float32).to(device)

        with torch.no_grad():
            action_tensor = ddpm.sampling(c_tensor)
        
        action = action_tensor.cpu().numpy().flatten()
        action = np.clip(action, -1, 1) # added
        # execute action in the environment
        observation, reward, done,_,_ = env.step(action)

        env.render()

        timestamp = time.time()

        # log data into csv file
        log_data.append({'timestamp':timestamp, 'action':action[0], 'position':position, 'velocity': velocity})

        if done:
            break

        time.sleep(0.1)
    
    log_df = pd.DataFrame(log_data)
    log_df.to_csv(output_file , index = False) 

In [11]:
if __name__ == '__main__':
    env = gym.make('MountainCarContinuous-v0', render_mode="human")
    ddpm = ConditionalDenoisingDiffusionProbabilisticModel(torch.tensor(x).float(),
                torch.tensor(c).float(),
                torch.tensor(beta).float(), device, batch_size=32)
    predict_and_execute(env,ddpm, device, n_steps=3000)

RuntimeError: Number of dimensions of repeat dims can not be smaller than number of dimensions of tensor

Maybe we need to modify the sampling method ?

## Calculate the Individual Action L2 Error

1) Without modifying the sampling method, only the network (not take action[0])

In [15]:
import pandas as pd
import torch
import numpy as np

def calculate_l2_loss_from_csv(csv_file, ddpm, device):
    df = pd.read_csv(csv_file)
    true_actions = df['action'].values[:, None]  # (n_steps, 1)

    total_loss = 0.0
    n_steps = len(df)

    for i in range(n_steps):
        c = df[['position', 'velocity']].iloc[i].values[None, :]  # (1, 2)

        c_tensor = torch.tensor(c, dtype=torch.float32).to(device)

        with torch.no_grad():
            predicted_action_tensor = ddpm.sampling(c_tensor)

        # -> numpy
        predicted_action = predicted_action_tensor.cpu().numpy().flatten()  # (action_dim,)

        loss = (predicted_action - true_actions[i]) ** 2  # (1,)

        print(f"{i} loss is {loss[0]}")  # 打印当前损失

        total_loss += loss[0]

    l2_loss = total_loss / n_steps
    print(f'L2 Loss: {l2_loss}')
    return l2_loss

csv_file = 'episode_3.csv'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

l2_loss = calculate_l2_loss_from_csv(csv_file, ddpm, device)


0 loss is 1.0
1 loss is 1.0
2 loss is 1.0
3 loss is 1.0
4 loss is 1.0
5 loss is 1.0
6 loss is 1.0
7 loss is 1.0
8 loss is 1.0
9 loss is 1.0
10 loss is 1.0
11 loss is 1.0
12 loss is 1.0
13 loss is 1.0
14 loss is 1.0
15 loss is 1.0
16 loss is 1.0
17 loss is 1.07965087890625
18 loss is 1.5625
19 loss is 2.392822265625
20 loss is 3.3994140625
21 loss is 4.0
22 loss is 4.0
23 loss is 4.0
24 loss is 4.0
25 loss is 4.0
26 loss is 4.0
27 loss is 4.0
28 loss is 4.0
29 loss is 4.0
30 loss is 4.0
31 loss is 4.0
32 loss is 4.0
33 loss is 4.0
34 loss is 4.0
35 loss is 4.0
36 loss is 4.0
37 loss is 4.0
38 loss is 4.0
39 loss is 4.0
40 loss is 4.0
41 loss is 4.0
42 loss is 4.0
43 loss is 4.0
44 loss is 4.0
45 loss is 4.0
46 loss is 4.0
47 loss is 4.0
48 loss is 4.0
49 loss is 4.0
50 loss is 2.92730712890625
51 loss is 1.523681640625
52 loss is 1.0
53 loss is 1.0
54 loss is 1.0
55 loss is 1.0
56 loss is 1.0
57 loss is 1.0
58 loss is 1.0
59 loss is 1.0
60 loss is 1.0
61 loss is 0.8622206086292863
62 lo