In [1]:
%matplotlib widget
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import matplotlib.pyplot as plt

import pandas as pd

import gymnasium as gym
import csv
import time
from gym.wrappers import RecordVideo

from tqdm import tqdm 

In [2]:
class ConditionalDiffusionNet(nn.Module):
    def __init__(self,data_dim,cond_dim):
        super(ConditionalDiffusionNet,self).__init__()
        n_unit = 256

        self.l1 = nn.Linear(data_dim, n_unit)
        self.l2 = nn.Linear(n_unit, n_unit)

        self.l1_beta = nn.Linear(1, n_unit)
        self.l2_beta = nn.Linear(n_unit, n_unit)

        self.l1_cond = nn.Linear(cond_dim, n_unit)
        self.l2_cond = nn.Linear(n_unit, n_unit)

        self.l3 = nn.Linear(n_unit,n_unit)
        self.l4 = nn.Linear(n_unit,data_dim)
    
    def forward(self,x,c,t):
        xx = self.l1(x)
        xx = F.relu(xx)
        xx = self.l2(xx)
        xx = F.relu(xx)

        cc = self.l1_cond(c)
        cc = F.relu(cc)
        cc = self.l2_cond(cc)
        cc = F.relu(cc)

        bb = self.l1_beta(t)
        bb = F.relu(bb)
        bb = self.l2_beta(bb)
        bb = F.relu(bb)

        xx = self.l3(xx+bb+cc)
        xx = F.relu(xx)
        xx = self.l4(xx)

        return xx

In [3]:
class ConditionalDenoisingDiffusionProbabilisticModel():
    def __init__(self, X, cond, beta, device, batch_size=32):
        self.device = device

        self.X = X
        self.x_dim = self.X.shape[1]
        self.C = cond
        self.c_dim = self.C.shape[1]
        self.beta = beta
        self.n_beta = self.beta.shape[0]

        alpha = 1 - self.beta
        self.alpha = torch.tensor([[torch.prod(alpha[:i+1])] for i in range(self.n_beta)]).float()

        self.batch_size = batch_size

        self.model = ConditionalDiffusionNet(self.X.shape[1], self.C.shape[1]).to(self.device)

        train_dataset = torch.utils.data.TensorDataset(self.X, self.C)
        self.train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)

        self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3)



    def learning(self, n_epoch=10):
        self.model.train()

        for e in range(n_epoch):
            for (x_batch, c_batch) in self.train_loader:
                loss_hist = []

                x_batch = x_batch
                c_batch = c_batch
                
                self.optimizer.zero_grad()

                t = torch.randint(low=0, high=self.n_beta, size=(x_batch.shape[0],))
                noise = torch.randn(x_batch.shape[0], self.x_dim)


                x_t = torch.sqrt(self.alpha[t]) * x_batch + torch.sqrt(1-self.alpha[t]) * noise

                noise_pred = self.model(x_t.to(self.device),
                                        c_batch.to(self.device),
                                        t[:,None].float().to(self.device))


                # import ipdb; ipdb.set_trace()
                loss = ((noise_pred - noise.to(device))**2).sum()
                loss_hist.append(loss.detach().cpu().numpy()/x_batch.shape[0])

                loss.backward()

                self.optimizer.step()

            print('epoch: {}, loss: {}'.format(e, np.array(loss_hist).mean()))

        self.model.eval()



    def sampling(self, c, n=100):
        x_sample = torch.randn(n, self.x_dim)
        c_sample = c.repeat(n, 1)

        for t in range(self.n_beta)[::-1]:
            noise = torch.randn(n, self.x_dim)
            if t==0: noise= torch.zeros(n, self.x_dim)

            sigma = torch.sqrt(self.beta[t]*(1-self.alpha[t-1])/(1-self.alpha[t]))

            noise_pred = self.model(x_sample.to(self.device),
                                    c_sample.to(self.device),
                                    torch.tensor([[t]]).float().to(self.device)).detach().cpu()

            # import ipdb;ipdb.set_trace()
            x_sample = (x_sample - self.beta[t]*noise_pred/torch.sqrt(1-self.alpha[t])) / torch.sqrt(1-self.beta[t]) + sigma * noise


        return x_sample

In [4]:
if __name__ == "__main__":
    import numpy as np
    import matplotlib.pyplot as plt

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    N = 3000
    x = np.linspace(0,2*np.pi, N)[:,None]
    y1 = np.sin(x)*2
    y2 = np.cos(x)*2


    xx = np.concatenate([x, x], axis=0)
    yy = np.concatenate(
        [np.concatenate([y1+np.random.normal(0, 0.5, (N,1)), y2+np.random.normal(0, 0.1, (N,1))], axis=1),
         np.concatenate([y2+np.random.normal(0, 0.1, (N,1)), y1+np.random.normal(0, 0.1, (N,1))], axis=1)],
        axis=0)



    beta = np.exp(np.linspace(np.log(0.001), np.log(0.9), 300))
    
    ddpm = ConditionalDenoisingDiffusionProbabilisticModel(
                torch.tensor(yy).float(),
                torch.tensor(xx).float(),
                torch.tensor(beta).float(), device, batch_size=32)

    ddpm.learning(100)


    print('sampling')
    y_sample = np.concatenate([ddpm.sampling(torch.tensor(x[i:i+1]).float(), 20).numpy()
                               for i in range(x.shape[0])[::100]])
    x_sample = np.concatenate([np.ones(20)*xx for xx in x.ravel()[::100]])


    fig = plt.figure()
    ax = fig.add_subplot(111,projection='3d')
    # ax.set_aspect('equal')
    ax.plot(x, y2, y1, 'k')
    ax.plot(x, y1, y2, 'k')
    ax.scatter(xx, yy[:,0], yy[:,1])

    ax.scatter(x_sample, y_sample[:,0], y_sample[:,1])

    plt.show()


epoch: 0, loss: 2.6292905807495117
epoch: 1, loss: 1.8284944295883179
epoch: 2, loss: 1.8389002084732056
epoch: 3, loss: 1.173917531967163
epoch: 4, loss: 1.8581647872924805
epoch: 5, loss: 1.750816822052002
epoch: 6, loss: 1.3658151626586914


KeyboardInterrupt: 

In [7]:
print('xx shape',xx.shape)
print('xx type', type(xx))
print('first few values of xx', xx[:5])

print('yy shape',yy.shape)
print('yy type', type(yy))
print('first few values of c', yy[:5])

print('beta shape',beta.shape)
print('beta type', type(beta))
print('first few values of beta', beta[:5])

print('beta shape',beta.shape)
print('beta type', type(beta))
print('first few values of beta', beta[:5])

print('beta shape',beta.shape)
print('beta type', type(beta))
print('first few values of beta', beta[:5])

xx shape (6000, 1)
xx type <class 'numpy.ndarray'>
first few values of xx [[0.        ]
 [0.00209509]
 [0.00419019]
 [0.00628528]
 [0.00838037]]
yy shape (6000, 2)
yy type <class 'numpy.ndarray'>
first few values of c [[-0.40540375  2.13729504]
 [ 0.50514306  1.98670626]
 [ 0.31272015  2.02338927]
 [ 0.43812962  2.08056098]
 [ 0.10769933  1.93192631]]
beta shape (300,)
beta type <class 'numpy.ndarray'>
first few values of beta [0.001      0.00102301 0.00104655 0.00107063 0.00109527]
beta shape (300,)
beta type <class 'numpy.ndarray'>
first few values of beta [0.001      0.00102301 0.00104655 0.00107063 0.00109527]
beta shape (300,)
beta type <class 'numpy.ndarray'>
first few values of beta [0.001      0.00102301 0.00104655 0.00107063 0.00109527]


### Reading Datas from csv file

In [8]:
file_path = 'episode_2.csv'
data = pd.read_csv(file_path)

# extraction: timestamp, action, position, velocity
x = data['action'].values[:,None]
c = data[['position', 'velocity']].values 

In [9]:
print('x shape',x.shape)
print('x type', type(x))
print('first few value of x', x[:5])

print('c shape',c.shape)
print('c type', type(c))
print('first few value of c', c[:5])

x shape (91, 1)
x type <class 'numpy.ndarray'>
first few value of x [[0.99996948]
 [0.92062378]
 [0.28573608]
 [0.        ]
 [0.        ]]
c shape (91, 2)
c type <class 'numpy.ndarray'>
first few value of c [[-0.46770674  0.00109079]
 [-0.46565223  0.0020545 ]
 [-0.46360153  0.00205069]
 [-0.4619984   0.00160314]
 [-0.46085465  0.00114376]]


### Learning 

In [10]:
beta = np.exp(np.linspace(np.log(0.001), np.log(0.9), 300))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

ddpm = ConditionalDenoisingDiffusionProbabilisticModel(
                torch.tensor(x).float(),
                torch.tensor(c).float(),
                torch.tensor(beta).float(), device, batch_size=32)

ddpm.learning(100)

epoch: 0, loss: 156.35132740162038
epoch: 1, loss: 360.1457609953704
epoch: 2, loss: 2.7705617834020546
epoch: 3, loss: 61.80036530671296
epoch: 4, loss: 2.1579165988498263
epoch: 5, loss: 21.58051441333912
epoch: 6, loss: 1.59392660635489
epoch: 7, loss: 7.072349265769676
epoch: 8, loss: 2.205293443467882
epoch: 9, loss: 3.011029278790509
epoch: 10, loss: 1.7766186749493633
epoch: 11, loss: 1.1492881774902344
epoch: 12, loss: 1.4640203405309606
epoch: 13, loss: 0.9820390630651403
epoch: 14, loss: 1.2776202449092158
epoch: 15, loss: 0.8480778446903935
epoch: 16, loss: 1.012567025643808
epoch: 17, loss: 0.6592386033799913
epoch: 18, loss: 0.6930957370334201
epoch: 19, loss: 0.7800033004195602
epoch: 20, loss: 0.7889183892144097
epoch: 21, loss: 0.817178867481373
epoch: 22, loss: 0.9278875280309606
epoch: 23, loss: 0.5183733480947988
epoch: 24, loss: 0.8098427101417824
epoch: 25, loss: 0.5260456932915581
epoch: 26, loss: 0.4021931401005498
epoch: 27, loss: 0.47934546294035735
epoch: 28, 

### Sampling 

In [11]:
def predict_and_execute(env, ddpm, device, n_steps, output_file = 'execution_log.csv'):
    # observation from gym evironment
    observation, _ = env.reset()

    # initialize a dataframe to log the data
    log_data =[]

    for step in range(n_steps):
        position = observation[0]
        velocity = observation[1]

        c_tensor = torch.tensor([[position, velocity]], dtype = torch.float32).to(device)

        with torch.no_grad():
            action_tensor = ddpm.sampling(c_tensor)
        
        action = action_tensor.cpu().numpy().flatten()

        # execute action in the environment
        observation, reward, done,_,_ = env.step(action)

        env.render()

        timestamp = time.time()

        # log data into csv file
        log_data.append({'timestamp':timestamp, 'action':action[0], 'position':position, 'velocity': velocity})

        if done:
            break

        time.sleep(0.1)
    
    log_df = pd.DataFrame(log_data)
    log_df.to_csv(output_file , index = False)      



In [14]:
if __name__ == '__main__':
    env = gym.make('MountainCarContinuous-v0', render_mode="human")
    ddpm = ConditionalDenoisingDiffusionProbabilisticModel(torch.tensor(x).float(),
                torch.tensor(c).float(),
                torch.tensor(beta).float(), device, batch_size=32)
    predict_and_execute(env,ddpm, device, n_steps=1000)

: 