
# Data Generation



In [1]:
# INSTALL DEEPINVERSE
%%capture
!pip install git+https://github.com/deepinv/deepinv.git

In [None]:
# IMPORT LIBRARY
import deepinv as dinv
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import torch
from torchvision import transforms, datasets

In [None]:
# SET GLOBAL RANDOM SEED FROM PYTORCH
#       : to ensure reproducibility of the example.
torch.manual_seed(42)

# SET DEVICE:
#       : to use GPU on colab, first change the runtime to T4 GPU
device = dinv.utils.get_freer_gpu() if torch.cuda.is_available() else "cpu"

# Use parallel dataloader if using a GPU to fasten training.
num_workers = 5 if torch.cuda.is_available() else 0

# STEP 1: IMPORT ORIGINAL DATASET

In [None]:
# IMPORT THE "ORIGINAL" DATASET
transform = transforms.Compose([transforms.ToTensor()])

# LOAD TRAIN DATASET
Train_dataset = datasets.MNIST(root="datasets/", train=True, transform=transform, download=True)
# LOAD TEST DATASET
Test_dataset = datasets.MNIST(root="datasets/", train=False, transform=transform, download=True)

100%|██████████| 9.91M/9.91M [00:00<00:00, 16.0MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 505kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.46MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 1.06MB/s]


# STEP 2: MODIFY THE DATASET

From the original dataset, i create new dataset modifying the startings ones with a certein physics. Depending on the physic I define I get different datasets

# STEP 2.1: BLURRED DATASET

In [None]:
#1. DEFINE THE PHYSICS: DEBLUR

#        Gaussian Blur: anisotropic variance, 45 degrees (simulates motion blur with a diagonal direction)
filter = dinv.physics.blur.gaussian_blur(sigma=(2, 0.1), angle=45.0).to(device=device)
#        Additive Gaussian Noise
noise_level = 0.1  # delta
noise_model = dinv.physics.GaussianNoise(sigma=noise_level)
#        BlurFFT Operator
physics = dinv.physics.BlurFFT(filter=filter,img_size=(1, 28, 28),device=device,noise_model=noise_model)


#2. SET MAX SIZE OF TRAIN AND TEST DATASET
n_train_max = (250 if torch.cuda.is_available() else 50)    # num of images used for training
n_test_max = (50 if torch.cuda.is_available() else 10)      # num of images used for testing


#3. SET PATH TO SAVE DATASET
measurement_dir = "blur"

#4. GENERATE DATASET
deepinv_datasets_path = dinv.datasets.generate_dataset(
    train_dataset=Train_dataset,
    test_dataset=Test_dataset,
    physics=physics,
    device=device,
    save_dir=measurement_dir,
    train_datapoints=n_train_max,
    test_datapoints=n_test_max,
    num_workers=num_workers,
    dataset_filename="deblur"
)

Dataset has been saved at blur/deblur0.h5


# STEP 2.2 NOISY DATASET

In [None]:
#1. DEFINE THE PHYSICS: DENOISE
#       Gaussian Noise
sigma_PnP = 0.05
noise_model_PnP = dinv.physics.GaussianNoise(sigma=sigma_PnP)
#       Denoise operator
physics_PnP = dinv.physics.Denoising(device=device,noise_model=noise_model_PnP)


#2. SET MAX SIZE OF TRAIN AND TEST DATASET
n_train_max_PnP = (250 if torch.cuda.is_available() else 50)  # number of images used for training
n_test_max_PnP = (50 if torch.cuda.is_available() else 10)  # number of images used for testing

#3. SET PATH TO SAVE DATASET
measurement_dir = "noisy"

#4. GENERATE DATASET
deepinv_datasets_path_PnP = dinv.datasets.generate_dataset(
    train_dataset=Train_dataset,
    test_dataset=Test_dataset,
    physics=physics_PnP,
    device=device,
    save_dir=measurement_dir,
    train_datapoints=n_train_max_PnP,
    test_datapoints=n_test_max_PnP,
    num_workers=num_workers,
    dataset_filename="denoise",
)

Dataset has been saved at noisy/denoise0.h5
