In [1]:
%matplotlib widget
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import matplotlib.pyplot as plt

import pandas as pd

import gymnasium as gym
import csv
import time
from gym.wrappers import RecordVideo

from tqdm import tqdm 

In [2]:
class ConditionalDiffusionNet(nn.Module):
    def __init__(self,data_dim,cond_dim):
        super(ConditionalDiffusionNet,self).__init__()
        n_unit = 256

        self.l1 = nn.Linear(data_dim, n_unit)
        self.l2 = nn.Linear(n_unit, n_unit)

        self.l1_beta = nn.Linear(1, n_unit)
        self.l2_beta = nn.Linear(n_unit, n_unit)

        self.l1_cond = nn.Linear(cond_dim, n_unit)
        self.l2_cond = nn.Linear(n_unit, n_unit)

        self.l3 = nn.Linear(n_unit,n_unit)
        self.l4 = nn.Linear(n_unit,data_dim)
    
    def forward(self,x,c,t):
        xx = self.l1(x)
        xx = F.relu(xx)
        xx = self.l2(xx)
        xx = F.relu(xx)

        cc = self.l1_cond(c)
        cc = F.relu(cc)
        cc = self.l2_cond(cc)
        cc = F.relu(cc)

        bb = self.l1_beta(t)
        bb = F.relu(bb)
        bb = self.l2_beta(bb)
        bb = F.relu(bb)

        xx = self.l3(xx+bb+cc)
        xx = F.relu(xx)
        xx = self.l4(xx)

        return xx

In [3]:
class ConditionalDenoisingDiffusionProbabilisticModel():
    def __init__(self, X, cond, beta, device, batch_size=32):
        self.device = device

        self.X = X
        self.x_dim = self.X.shape[1]
        self.C = cond
        self.c_dim = self.C.shape[1]
        self.beta = beta
        self.n_beta = self.beta.shape[0]

        alpha = 1 - self.beta
        self.alpha = torch.tensor([[torch.prod(alpha[:i+1])] for i in range(self.n_beta)]).float()

        self.batch_size = batch_size

        self.model = ConditionalDiffusionNet(self.X.shape[1], self.C.shape[1]).to(self.device)

        train_dataset = torch.utils.data.TensorDataset(self.X, self.C)
        self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)

        self.optimizer = optim.Adam(self.model.parameters(), lr=1e-5)



    def learning(self, n_epoch=10):
        self.model.train()

        for e in range(n_epoch):
            for (x_batch, c_batch) in self.train_loader:
                loss_hist = []

                x_batch = x_batch
                c_batch = c_batch
                
                self.optimizer.zero_grad()

                t = torch.randint(low=0, high=self.n_beta, size=(x_batch.shape[0],))
                noise = torch.randn(x_batch.shape[0], self.x_dim)


                x_t = torch.sqrt(self.alpha[t]) * x_batch + torch.sqrt(1-self.alpha[t]) * noise

                noise_pred = self.model(x_t.to(self.device),
                                        c_batch.to(self.device),
                                        t[:,None].float().to(self.device))


                # import ipdb; ipdb.set_trace()
                loss = ((noise_pred - noise.to(device))**2).sum()
                loss_hist.append(loss.detach().cpu().numpy()/x_batch.shape[0])

                loss.backward()

                self.optimizer.step()

            print('epoch: {}, loss: {}'.format(e, np.array(loss_hist).mean()))

        self.model.eval()



    def sampling(self, c, n=100):
        x_sample = torch.randn(n, self.x_dim)
        c_sample = c.repeat(n, 1)

        for t in range(self.n_beta)[::-1]:
            noise = torch.randn(n, self.x_dim)
            if t==0: noise= torch.zeros(n, self.x_dim)

            sigma = torch.sqrt(self.beta[t]*(1-self.alpha[t-1])/(1-self.alpha[t]))

            noise_pred = self.model(x_sample.to(self.device),
                                    c_sample.to(self.device),
                                    torch.tensor([[t]]).float().to(self.device)).detach().cpu()

            # import ipdb;ipdb.set_trace()
            x_sample = (x_sample - self.beta[t]*noise_pred/torch.sqrt(1-self.alpha[t])) / torch.sqrt(1-self.beta[t]) + sigma * noise


        return x_sample

In [4]:
file_path = 'combined_little_random_expert_trajectory.csv'
data = pd.read_csv(file_path)

# extraction: timestamp, action, position, velocity
x = data['action'].values[:,None]
c = data[['position', 'velocity']].values 

### Learning 

In [5]:
beta = np.exp(np.linspace(np.log(0.001), np.log(0.9), 300))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

ddpm = ConditionalDenoisingDiffusionProbabilisticModel(
                torch.tensor(x).float(),
                torch.tensor(c).float(),
                torch.tensor(beta).float(), device, batch_size=32)

ddpm.learning(100)

epoch: 0, loss: 0.3116174538930257
epoch: 1, loss: 0.7314106623331705
epoch: 2, loss: 0.6297504107157389
epoch: 3, loss: 0.4688815275828044
epoch: 4, loss: 0.9462671279907227
epoch: 5, loss: 0.4115457932154338
epoch: 6, loss: 0.09302586317062378
epoch: 7, loss: 0.02653154730796814
epoch: 8, loss: 0.3435930411020915
epoch: 9, loss: 0.03698127965132395
epoch: 10, loss: 0.10986687739690144
epoch: 11, loss: 0.8250683148701986
epoch: 12, loss: 0.2856494386990865
epoch: 13, loss: 0.41777586936950684
epoch: 14, loss: 0.022509281833966572
epoch: 15, loss: 0.12846378485361734
epoch: 16, loss: 0.6147297223409017
epoch: 17, loss: 0.042718966801961265
epoch: 18, loss: 0.8231251239776611
epoch: 19, loss: 0.22755000988642374
epoch: 20, loss: 0.033407509326934814
epoch: 21, loss: 0.18682469924290976
epoch: 22, loss: 0.34191596508026123
epoch: 23, loss: 2.132023016611735
epoch: 24, loss: 1.3166991869608562
epoch: 25, loss: 0.265300194422404
epoch: 26, loss: 0.1303223967552185
epoch: 27, loss: 0.028244

### Sampling 

In [6]:
# def predict_and_execute(env, ddpm, device, n_steps, output_file = 'execution_log.csv'):
#     # observation from gym evironment
#     observation, _ = env.reset()

#     # initialize a dataframe to log the data
#     log_data =[]

#     for step in range(n_steps):
#         position = observation[0]
#         velocity = observation[1]

#         c_tensor = torch.tensor([[position, velocity]], dtype = torch.float32).to(device)

#         with torch.no_grad():
#             action_tensor = ddpm.sampling(c_tensor)
        
#         action = action_tensor.cpu().numpy().flatten()

#         # execute action in the environment
#         observation, reward, done,_,_ = env.step(action)

#         env.render()

#         timestamp = time.time()

#         # log data into csv file
#         log_data.append({'timestamp':timestamp, 'action':action[0], 'position':position, 'velocity': velocity})

#         if done:
#             break

#         time.sleep(0.1)
    
#     log_df = pd.DataFrame(log_data)
#     log_df.to_csv(output_file , index = False)      

In [8]:
def predict_and_execute(env, ddpm, device, n_steps, output_file = 'execution_log.csv'):
    # observation from gym evironment
    observation, _ = env.reset()

    # initialize a dataframe to log the data
    log_data =[]

    position_list = []
    velocity_list = []

    for step in range(n_steps):
        position = observation[0]
        velocity = observation[1]

        position_list.append(position)
        velocity_list.append(velocity)

        c_tensor = torch.tensor([[position, velocity]], dtype = torch.float32).to(device)

        with torch.no_grad():
            action_tensor = ddpm.sampling(c_tensor)
        
        action = action_tensor.cpu().numpy().flatten()

        #print("action is ",action)

        # execute action in the environment
        observation, reward, done,_,_ = env.step(action)

        env.render()

        timestamp = time.time()

        # log data into csv file
        log_data.append({'timestamp':timestamp, 'action':action[0], 'position':position, 'velocity': velocity})

        if done:
            break

        # time.sleep(0.1)
    
    log_df = pd.DataFrame(log_data)
    log_df.to_csv(output_file , index = False)      

In [None]:
if __name__ == '__main__':
    env = gym.make('MountainCarContinuous-v0', render_mode="human")
    ddpm = ConditionalDenoisingDiffusionProbabilisticModel(torch.tensor(x).float(),
                torch.tensor(c).float(),
                torch.tensor(beta).float(), device, batch_size=32)
    predict_and_execute(env,ddpm, device, n_steps=4000)

### Inidividual Loss calculation

data - > model -> action ---- compare with original 

Same learning method

1) Give the state pair (velocity, position) one by one to the model, do the sampling inference and compare to the original action

2) Give the state pair list (which append value by time ) to the model , do the sampling inference and compare to the original action

In [6]:
def calculate_l2_loss_from_csv(csv_file, ddpm, device):
    df = pd.read_csv(csv_file)

    # store the l2 loss of each step
    total_loss = 0.0
    n_steps = len(df)

    for index, row in df.iterrows():
        position = row['position']
        velocity = row['velocity']
        true_action = row['action']

        c_tensor = torch.tensor([[position, velocity]], dtype=torch.float32).to(device)

        with torch.no_grad():
            predicted_action_tensor = ddpm.sampling(c_tensor)

        predicted_action = predicted_action_tensor.cpu().numpy().flatten()

        loss = (predicted_action - true_action) ** 2
        
        print("{} loss is {}".format(index, loss ))
        
        total_loss += loss

    l2_loss = total_loss / n_steps
    print(f'L2 Loss: {l2_loss}')
    return l2_loss

# 使用示例
csv_file = 'episode_3.csv'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

l2_loss = calculate_l2_loss_from_csv(csv_file, ddpm, device)


0 loss is [2.9580790e-06 1.8178624e-01 7.7865422e-02 2.8664413e-01 2.0868106e-01
 2.6690930e-02 3.1641617e-01 3.6854538e-01 7.2663084e-02 8.0191356e-01
 8.0668032e-01 4.7531116e-01 4.8857173e-03 1.3935099e-02 3.0076573e-02
 3.6959659e-02 8.1056933e-04 1.4209565e-02 1.3957897e-01 1.6546469e-02
 4.3670326e-03 1.7598817e-01 1.0504459e-03 1.4690727e-02 3.3729430e-02
 3.9637383e-02 8.2999825e-01 7.2953962e-02 1.5898612e-01 4.8006233e-02
 1.2874254e-03 1.0105711e-01 3.3185355e-02 3.0562738e-01 2.6524568e-03
 1.4204755e-02 1.4669533e-03 1.2842401e+00 6.3007236e-02 8.9165913e-03
 1.0501587e-02 2.2972904e-03 1.0417122e+00 5.9990719e-02 4.5770500e-02
 4.8133340e-02 7.9571891e-01 5.4142249e-01 7.5019699e-01 3.8196191e-01
 3.2377240e-01 2.4233516e-02 5.5877209e-01 3.7193781e-01 4.5011863e-01
 8.2845372e-01 3.0664923e-03 4.2224410e-03 2.0058560e-03 2.4399430e-02
 1.3630514e-02 5.4748565e-02 1.1213752e+00 7.0236737e-01 7.3346806e-01
 7.9820651e-01 5.7249266e-01 9.8099065e-01 3.3295536e-01 1.1320021e

## problem found:  ddpm output a serious of value which is appropriate for the control in mountian car continous

debug 1: just take the first value of predicted action and apply

debug 2: modify the last layer of network ???

In [10]:
def predict_and_execute(env, ddpm, device, n_steps, output_file='execution_log_2.csv'):
    # observation from gym environment
    observation, _ = env.reset()

    # initialize a dataframe to log the data
    log_data = []

    position_list = []
    velocity_list = []

    for step in range(n_steps):
        position = observation[0]
        velocity = observation[1]

        position_list.append(position)
        velocity_list.append(velocity)

        # Convert position and velocity into tensor for model input
        c_tensor = torch.tensor([[position, velocity]], dtype=torch.float32).to(device)

        # Use ddpm to predict action
        with torch.no_grad():
            action_tensor = ddpm.sampling(c_tensor)
        
        # Extract the first value of action (or process accordingly)
        action = action_tensor.cpu().numpy().flatten()[0]  # 只取第一个值作为action
        
        # Print the action for debugging
        print("action is ", action)

        # Execute the action in the environment
        observation, reward, done, _, _ = env.step([action])  # 环境期望的action是一个列表
        
        # Render the environment (optional)
        env.render()

        # Get current timestamp
        timestamp = time.time()

        # Log the data
        log_data.append({'timestamp': timestamp, 'action': action, 'position': position, 'velocity': velocity})

        # Check if the episode is done
        if done:
            break

    # Save the log data into a CSV file
    log_df = pd.DataFrame(log_data)
    log_df.to_csv(output_file, index=False)

if __name__ == '__main__':
    # Initialize the environment and the ddpm model
    env = gym.make('MountainCarContinuous-v0', render_mode="human")
    
    # Initialize the ddpm model (assuming you already have the trained model)
    ddpm = ConditionalDenoisingDiffusionProbabilisticModel(torch.tensor(x).float(),
                                                           torch.tensor(c).float(),
                                                           torch.tensor(beta).float(),
                                                           device,
                                                           batch_size=32)

    # Execute the task
    predict_and_execute(env, ddpm, device, n_steps=4000)

action is  -180889260000000.0
action is  -156411270000000.0
action is  -190859450000000.0
action is  -158621600000000.0
action is  -187306070000000.0
action is  -193344210000000.0
action is  -162826300000000.0
action is  -165218380000000.0
action is  -133253460000000.0
action is  -175828360000000.0
action is  -145640800000000.0
action is  -167618700000000.0
action is  -139156990000000.0
action is  -166654110000000.0
action is  -166020720000000.0
action is  -187164240000000.0
action is  -133655700000000.0
action is  -157222500000000.0
action is  -135647415000000.0
action is  -166965410000000.0
action is  -157664760000000.0
action is  -154286680000000.0
action is  -150875300000000.0
action is  -157410010000000.0
action is  -164091290000000.0
action is  -169201650000000.0
action is  -172368560000000.0
action is  -185142010000000.0
action is  -150343070000000.0
action is  -158706700000000.0
action is  -189205460000000.0
action is  -137263550000000.0
action is  -172644620000000.0
action is 

calculate individual error 