# Diffusion Model Transfer Learning

## 1. Technical Requirements

In [None]:
!pip install torch torchvision torchaudio
!pip install bitsandbytes
!pip install transformers
!pip install accelerate
!pip install diffusers

## 2. Training a Neural Network model with PyTorch

### 2.1. Prepare the training data

In [1]:
import numpy as np
w_list = np.array([2,3,4,7])

import random
x_list = []
for _ in range(10):
    x_sample = np.array([random.randint(1,100) for _ in range(len(w_list))])
    x_list.append(x_sample)

y_list = []
for x_sample in x_list:
    y_temp = x_sample@w_list
    y_list.append(y_temp)

print("x_list:",x_list)
print("y_list:",y_list)

x_list: [array([58, 49, 87, 76]), array([89, 10,  9, 43]), array([62, 75, 75, 53]), array([20, 16, 77, 22]), array([70, 93, 71, 56]), array([71, 79, 29, 60]), array([ 97, 100,  76,  51]), array([ 20, 100,  57,  35]), array([49, 33, 24,  1]), array([ 9, 36, 10, 36])]
y_list: [1143, 545, 1020, 550, 1095, 915, 1155, 813, 300, 418]


### 2.2. Preparing for training

In [2]:
import torch 
import torch.nn as nn

class MyLinear(nn.Module):
    def __init__(self):
        super().__init__()
        self.w = nn.Parameter(torch.randn(len(w_list)))
    
    def forward(self, x:torch.Tensor):
        return self.w @ x
    
model = MyLinear()

loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.00001)

x_input = torch.tensor(x_list, dtype=torch.float32)
y_output = torch.tensor(y_list, dtype=torch.float32)

  x_input = torch.tensor(x_list, dtype=torch.float32)


### 2.3. Train the model

In [3]:
# start train model
num_epochs = 100
for epoch in range(num_epochs):
    for i, x in enumerate(x_input):
        # forward
        y_pred = model(x)

        # calculate loss
        loss = loss_fn(y_pred,y_output[i])

        # zero out the cached parameter.
        optimizer.zero_grad()

        # backward
        loss.backward()

        # update paramters
        optimizer.step()

    if (epoch+1) % 10 == 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))


print("train done") 


Epoch [10/100], Loss: 1296.4526
Epoch [20/100], Loss: 543.5552
Epoch [30/100], Loss: 215.0186
Epoch [40/100], Loss: 82.6519
Epoch [50/100], Loss: 31.3098
Epoch [60/100], Loss: 11.7713
Epoch [70/100], Loss: 4.4084
Epoch [80/100], Loss: 1.6476
Epoch [90/100], Loss: 0.6149
Epoch [100/100], Loss: 0.2295
train done


In [4]:
model.w

Parameter containing:
tensor([2.0052, 3.0020, 4.0047, 6.9828], requires_grad=True)

## 3. Training a model with Hugging Face Accelerate

### 3.1 Training a model with Accelerate

In [5]:
# start train model using Accelerate
from accelerate import utils
utils.write_basic_config()

from accelerate import Accelerator
accelerator = Accelerator()
device = accelerator.device

x_input.to(device)
y_output.to(device)
model.to(device)

model, optimizer = accelerator.prepare(
    model, optimizer
)

num_epochs = 100
for epoch in range(num_epochs):
    for i, x in enumerate(x_input):
        # forward
        y_pred = model(x)

        # calculate loss
        loss = loss_fn(y_pred,y_output[i])

        # zero out the cached parameter.
        optimizer.zero_grad()

        # backward
        #loss.backward()
        accelerator.backward(loss)

        # update paramters
        optimizer.step()

    if (epoch+1) % 10 == 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

print("train done") 

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument tensor in method wrapper_CUDA__dot)

In [13]:
model = accelerator.unwrap_model(model)
model.w

Parameter containing:
tensor([2.0359, 2.9466, 4.0035, 6.9901], device='cuda:0', requires_grad=True)

### 3.2 Prepare the training data for multiple GPUs training

In [1]:
import numpy as np
w_list = np.array([2,3,4,7])

import random
x_list = []
for _ in range(10):
    x_sample = np.array([random.randint(1,100) for _ in range(len(w_list))])
    x_list.append(x_sample)

y_list = []
for x_sample in x_list:
    y_temp = x_sample@w_list
    y_list.append(y_temp)
train_obj = {
    'w_list':w_list.tolist()
    , 'input':x_list
    , 'output':y_list
}

import pickle
with open('train_data.pkl','wb') as f:
    pickle.dump(train_obj,f)

### 3.3 Train the model with multiple GPUs using Accelerate

The code is in the file `train_model_in_2gpus.py`