In [1]:
# for colab
# !git clone https://github.com/laralex/Sk-DL2021-FinalProject
# import sys
# sys.path.append('Sk-DL2021-FinalProject')

# for local
import sys
sys.path.append('..')

In [2]:
import os
import torch
from data.split_step_generator import SplitStepGenerator, find_dataset_subdir

In [3]:
GOOGLE_DRIVE = False
if GOOGLE_DRIVE:
    from google.colab import drive
    drive.mount(f'./drive')
    root_dir = f'./drive/MyDrive/Sk-DL2021-Datasets'
else:
    root_dir = '../../generated_datasets'
if not os.path.exists(root_dir):
    os.makedirs(root_dir, exist_ok=True)

In [4]:
data_gen = SplitStepGenerator(batch_size=10,
                          seq_len=33,
                          dispersion=0.6,
                          nonlinearity=0.02,
                          pulse_width=10,
                          z_end=120,
                          dz=0.1,
                          z_stride=1000,
                          dim_t=2**12,
                          dispersion_compensate=True,
                          num_blocks = 16,
                          n_train_batches = 1,
                          n_val_batches = 1,
                          n_test_batches = 1,
                          two_dim_data=True,
                          device='available',
                          )

In [5]:
import yaml
import datetime 


def create_destination(hparams, datasets_root):
    new_dir = f'{root_dir}/{datetime.datetime.now().strftime("%m-%d-%Y=%H-%M-%S")}'
    os.makedirs(new_dir)
    with open(f'{new_dir}/signal_hparams.yaml', 'w') as outfile:
        yaml.dump(hparams, outfile, default_flow_style=False)
    return new_dir
    
destination_root = find_dataset_subdir(data_gen.signal_hparams, root_dir)
if destination_root is None:
    destination_root = create_destination(data_gen.signal_hparams, root_dir)
print('Destination: ', destination_root)       

Destination:  ../../generated_datasets/05-21-2021=01-18-46


In [6]:
data_gen.prepare_data()
data_gen.setup()

Generating the dataset using Split-Step
Dataset was generated in 13 sec


In [7]:
def save_tensor(tensor, subdir):
    if tensor is None:
        print('Nothing to save', subdir)
        return
    if tensor.numel() == 0:
        return
    i = 0
    while os.path.exists(f"{subdir}/{i}.pt"):
        i += 1
    torch.save(tensor, f"{subdir}/{i}.pt")
    
type_subdirs = [destination_root + '/' + sub for sub in ['train', 'val', 'test']]
for d in type_subdirs:
    os.makedirs(d, exist_ok=True)
    
save_tensor(data_gen.train, type_subdirs[0])
save_tensor(data_gen.val, type_subdirs[1])
save_tensor(data_gen.test, type_subdirs[2])

In [8]:
data_gen = SplitStepGenerator(batch_size=10,
                          seq_len=33,
                          dispersion=0.6,
                          nonlinearity=0.02,
                          pulse_width=10,
                          z_end=120,
                          dz=0.1,
                          z_stride=1000,
                          dim_t=2**12,
                          dispersion_compensate=True,
                          num_blocks = 16,
                          two_dim_data=True,
                          device='available',
                          dataset_root_path=root_dir,
                          )

In [9]:
data_gen.prepare_data()

In [10]:
data_gen.train.shape, data_gen.val.shape, data_gen.test.shape

(torch.Size([2, 30, 512, 16]),
 torch.Size([2, 50, 512, 16]),
 torch.Size([2, 70, 512, 16]))