<a href="https://colab.research.google.com/github/pollyjuice74/Error-Bit-Decoding/blob/main/DDECCT_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from __future__ import print_function
import argparse
import random
import sys
import os
import torch
from torch.utils.data import DataLoader
from torch.utils import data
from datetime import datetime
import time
from torch.optim.lr_scheduler import CosineAnnealingLR

if not os.path.exists('DDECC'):
  !git clone https://github.com/pollyjuice74/DDECC.git
os.chdir('DDECC')

from Codes import *
from DDECC import DDECCT
from utils import *


Cloning into 'DDECC'...
remote: Enumerating objects: 100, done.[K
remote: Counting objects: 100% (38/38), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 100 (delta 14), reused 35 (delta 13), pack-reused 62[K
Receiving objects: 100% (100/100), 141.49 MiB | 23.89 MiB/s, done.
Resolving deltas: 100% (36/36), done.


# The modifications are made so that the model trains on BCH codes of length n=63, k=45, where it says:

"### IMPORTANT ###"

In [None]:
# Setup the argument parser
parser = argparse.ArgumentParser(description='PyTorch DDPM_ECCT')

### IMPORTANT ###
###############################################################################
###############################################################################

# select code type, k and n                           ###
parser.add_argument('--code_type', type=str, default='BCH', choices=['BCH', 'POLAR', 'LDPC', 'CCSDS', 'MACKAY'])
parser.add_argument('--code_k', type=int, default=45) # k
parser.add_argument('--code_n', type=int, default=63) # n

###############################################################################
###############################################################################

parser.add_argument('--epochs', type=int, default=2000) ## EPOCHS
parser.add_argument('--workers', type=int, default=4)
parser.add_argument('--lr', type=float, default=5e-4)
parser.add_argument('--gpus', type=str, default='0', help='gpus ids')
parser.add_argument('--batch_size', type=int, default=128)
parser.add_argument('--test_batch_size', type=int, default=2048)
parser.add_argument('--seed', type=int, default=42)
parser.add_argument('--N_dec', type=int, default=2)
parser.add_argument('--d_model', type=int, default=32)
parser.add_argument('--h', type=int, default=8)
parser.add_argument('--sigma', type=float, default=0.01)

# Function to set seed for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

# Adjust argument parsing for notebook environments
if 'ipykernel' in sys.argv[0] or 'colab' in sys.argv[0]:
    args = parser.parse_args(args=[])
else:
    args = parser.parse_args()

# Environment settings for CUDA
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus

# Apply the seed
set_seed(args.seed)

# Code setup
class Code():
    pass

code = Code()
code.k = args.code_k
code.n = args.code_n
code.code_type = args.code_type
G, H = Get_Generator_and_Parity(code)
code.generator_matrix = torch.from_numpy(G).transpose(0, 1).long()
code.pc_matrix = torch.from_numpy(H).long()
args.code = code
args.N_steps = code.pc_matrix.shape[0] + 5  # Calculate steps

# Setup model directory and
model_dir = os.path.join('DDECCT_Results', f'{args.code_type}__Code_n_{args.code_n}_k_{args.code_k}') #__{datetime.now().strftime("%d_%m_%Y_%H_%M_%S")}')
os.makedirs(model_dir, exist_ok=True)
args.path = model_dir

print(f"Path to model/logs: {model_dir}")
print(f"Args: {args}")



Path to model/logs: DDECCT_Results/BCH__Code_n_63_k_45
Args: Namespace(code_type='BCH', code_k=45, code_n=63, epochs=2000, workers=4, lr=0.0005, gpus='0', batch_size=128, test_batch_size=2048, seed=42, N_dec=2, d_model=32, h=8, sigma=0.01, code=<__main__.Code object at 0x7ec0fd2383a0>, N_steps=23, path='DDECCT_Results/BCH__Code_n_63_k_45')


# The modifications made are seen in the **FEC_Dataset** where it says:

"### IMPORTANT ###"

In [None]:
############ ZERO CODEWORD SET TO TRUE ###############
class FEC_Dataset(data.Dataset):                 ####
    def __init__(self, code, sigma, len, zero_cw=True):
        self.code = code
        self.sigma = sigma
        self.len = len
        self.generator_matrix = code.generator_matrix.transpose(0, 1)
        self.pc_matrix = code.pc_matrix.transpose(0, 1)

        self.zero_word = torch.zeros((self.code.k)).long() if zero_cw else None
        self.zero_cw = torch.zeros((self.code.n)).long() if zero_cw else None

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        if self.zero_cw is None:
            m = torch.randint(0, 2, (1, self.code.k)).squeeze()
            x = torch.matmul(m, self.generator_matrix) % 2
        else: # SET TO TRUE
            m = self.zero_word
            x = self.zero_cw

        std_noise = random.choice(self.sigma)
        z = torch.randn(self.code.n) * std_noise
        #h = torch.from_numpy(np.random.rayleigh(1,self.code.n)).float()
        # h=1
        # y = h*bin_to_sign(x) + z

        ### IMPORTANT ###
        #######################################################################
        #######################################################################
        y = x.clone()

        # index to be flipped
        ix = torch.tensor(random.sample(range(self.code.n), 3))
        y[ix] = 1 - y[ix] # flip bits
        y = bin_to_sign(y)

        #######################################################################
        #######################################################################

        magnitude = torch.abs(y)
        syndrome = torch.matmul(sign_to_bin(torch.sign(y)).long(),
                                self.pc_matrix) % 2
        syndrome = bin_to_sign(syndrome)
        return m.float(), x.float(), z.float(), y.float(), magnitude.float(), syndrome.float()


def train(model, device, train_loader, optimizer, epoch, LR):
    model.train()
    cum_loss = cum_samples = 0
    t = time.time()
    for batch_idx, (m, x, z, y, magnitude, syndrome) in enumerate(
            train_loader):
        loss = model.loss(bin_to_sign(x))
        model.zero_grad()
        loss.backward()
        optimizer.step()
        model.ema.update(model)
        ###
        cum_loss += loss.item() * x.shape[0]
        cum_samples += x.shape[0]
        if (batch_idx+1) % 500 == 0 or batch_idx == len(train_loader) - 1:
            print(
                f'Training epoch {epoch}, Batch {batch_idx + 1}/{len(train_loader)}: LR={LR:.2e}, Loss={cum_loss / cum_samples:.5e}')
    print(f'Epoch {epoch} Train Time {time.time() - t}s\n')
    return cum_loss / cum_samples

##################################################################

def test(model, device, test_loader_list, EbNo_range_test, min_FER=100, max_cum_count=1e7, min_cum_count=1e5):
    model.eval()
    test_loss_ber_list, test_loss_fer_list, cum_samples_all = [], [], []
    t = time.time()
    with torch.no_grad():
        for ii, test_loader in enumerate(test_loader_list):
            test_ber = test_fer = cum_count = 0.
            _, x_pred_list, _, _ = model.p_sample_loop(next(iter(test_loader))[3])
            test_ber_ddpm , test_fer_ddpm = [0]*len(x_pred_list), [0]*len(x_pred_list)
            idx_conv_all = []
            while True:
                (m, x, z, y, magnitude, syndrome) = next(iter(test_loader))
                x_pred, x_pred_list, idx_conv,synd_all = model.p_sample_loop(y)
                x_pred = sign_to_bin(torch.sign(x_pred))

                idx_conv_all.append(idx_conv)
                for kk, x_pred_tmp in enumerate(x_pred_list):
                    x_pred_tmp = sign_to_bin(torch.sign(x_pred_tmp))

                    test_ber_ddpm[kk] += BER(x_pred_tmp, x) * x.shape[0]
                    test_fer_ddpm[kk] += FER(x_pred_tmp, x) * x.shape[0]

                test_ber += BER(x_pred, x) * x.shape[0]
                test_fer += FER(x_pred, x) * x.shape[0]
                cum_count += x.shape[0]
                if (min_FER > 0 and test_fer > min_FER and cum_count > min_cum_count) or cum_count >= max_cum_count:
                    if cum_count >= 1e9:
                        print(f'Cum count reached EbN0:{EbNo_range_test[ii]}')
                    else:
                        print(f'FER count treshold reached EbN0:{EbNo_range_test[ii]}')
                    break
            idx_conv_all = torch.stack(idx_conv_all).float()
            cum_samples_all.append(cum_count)
            test_loss_ber_list.append(test_ber / cum_count)
            test_loss_fer_list.append(test_fer / cum_count)
            for kk in range(len(test_ber_ddpm)):
                test_ber_ddpm[kk] /= cum_count
                test_fer_ddpm[kk] /= cum_count
            print(f'Test EbN0={EbNo_range_test[ii]}, BER={test_loss_ber_list}')
            print(f'Test EbN0={EbNo_range_test[ii]}, BER_DDPM={test_ber_ddpm}')
            print(f'Test EbN0={EbNo_range_test[ii]}, -ln(BER)_DDPM={[-np.log(elem) for elem in test_ber_ddpm]}')
            print(f'Test EbN0={EbNo_range_test[ii]}, FER_DDPM={test_fer_ddpm}')
            print(f'#It. to zero syndrome: Mean={idx_conv_all.mean()}, Std={idx_conv_all.std()}, Min={idx_conv_all.min()}, Max={idx_conv_all.max()}')
        ###
        print('Test FER ' + ' '.join(
            ['{}: {:.2e}'.format(ebno, elem) for (elem, ebno)
             in
             (zip(test_loss_fer_list, EbNo_range_test))]))
        print('Test BER ' + ' '.join(
            ['{}: {:.2e}'.format(ebno, elem) for (elem, ebno)
             in
             (zip(test_loss_ber_list, EbNo_range_test))]))
        print('Test -ln(BER) ' + ' '.join(
            ['{}: {:.2e}'.format(ebno, -np.log(elem)) for (elem, ebno)
             in
             (zip(test_loss_ber_list, EbNo_range_test))]))
    print(f'# of testing samples: {cum_samples_all}\n Test Time {time.time() - t} s\n')
    return test_loss_ber_list, test_loss_fer_list

In [None]:
code = args.code
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MODEL #
#################################
model = DDECCT(args, device=device,dropout=0).to(device)
model.ema.register(model)

print('Loading Best Model')
print(args.path)
print(os.getcwd())
model = torch.load(os.path.join(args.path, 'best_model')).to(device)
#################################

optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
scheduler = CosineAnnealingLR(optimizer, T_max=args.epochs, eta_min=5e-6)

print(model)
print(f'# of Parameters: {np.sum([np.prod(p.shape) for p in model.parameters()])}')

#################################
EbNo_range_test = range(4, 7)
EbNo_range_train = range(2, 8)
std_train = [EbN0_to_std(ii, code.k / code.n) for ii in EbNo_range_train]
std_test = [EbN0_to_std(ii, code.k / code.n) for ii in EbNo_range_test]
train_dataloader = DataLoader(FEC_Dataset(code, std_train, len=args.batch_size * 1000, zero_cw=True), batch_size=int(args.batch_size),
                              shuffle=True, num_workers=args.workers)
test_dataloader_list = [DataLoader(FEC_Dataset(code, [std_test[ii]], len=int(args.test_batch_size), zero_cw=False),
                                    batch_size=int(args.test_batch_size), shuffle=False, num_workers=args.workers) for ii in range(len(std_test))]
#################################

print(f"Training model with code type: {args.code_type}")
print(args.code_type)


best_loss = float('inf')
for epoch in range(1, args.epochs + 1):
    loss= train(model, device, train_dataloader, optimizer,
                            epoch, LR=scheduler.get_last_lr()[0])
    scheduler.step()
    if loss < best_loss:
        best_loss = loss
        torch.save(model, os.path.join(args.path, 'best_model'))
        print(f'Model Saved')
    if epoch % (args.epochs//2) == 0 or epoch in [1,25]:

        ### PUSH TO GITHUB ###
        !git config --global user.name "pollyjuice74"
        !git config --global user.email "hernandez.aht82836@gmail.com"

        !git remote set-url origin https://pollyjuice74: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@github.com/pollyjuice74/DDECC.git

        !git add .
        !git commit -m "Add trained model weights"
        !git push origin main
        ######################

        test(model, device, test_dataloader_list, EbNo_range_test,min_FER=50,max_cum_count=1e6,min_cum_count=1e4)
#################################

print('Regular Reverse Diffusion')
test(model, device, test_dataloader_list, EbNo_range_test,min_FER=100)
print('Line Search Reverse Diffusion')
model.line_search = True
test(model, device, test_dataloader_list, EbNo_range_test,min_FER=100)

Loading Best Model
DDECCT_Results/BCH__Code_n_63_k_45
/content/DDECC


RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [6]:
!git config --global user.name "pollyjuice74"
!git config --global user.email "hernandez.aht82836@gmail.com"

!git remote set-url origin https://pollyjuice74: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@github.com/pollyjuice74/DDECC.git

!git add .
!git commit -m "Add trained model weights"
!git push origin main

[main 4cc0c5a] Add trained model weights
 3 files changed, 0 insertions(+), 0 deletions(-)
Enumerating objects: 9, done.
Counting objects: 100% (9/9), done.
Delta compression using up to 2 threads
Compressing objects: 100% (6/6), done.
Writing objects: 100% (6/6), 6.10 KiB | 6.10 MiB/s, done.
Total 6 (delta 2), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (2/2), completed with 2 local objects.[K
To https://github.com/pollyjuice74/DDECC.git
   d008088..4cc0c5a  main -> main
