In [None]:
import os
from pathlib import Path

In [None]:
# for colab
!git clone https://github.com/laralex/Sk-DL2021-FinalProject
repo_dir = Path().absolute()/'Sk-DL2021-FinalProject'
%pushd Sk-DL2021-FinalProject
!git pull
!git checkout dataset_offline_generation
!pip install pytorch_lightning
import sys
sys.path.append('Sk-DL2021-FinalProject')

In [None]:
# for local
# import sys
# sys.path.append('..')
# repo_dir = Path().absolute().parent

In [None]:
!pwd

import torch
from data.split_step_generator import SplitStepGenerator

GOOGLE_DRIVE = True

if GOOGLE_DRIVE:
    from google.colab import drive
    drive.mount(f'/content/drive')
    root_dir = Path('/content/drive/MyDrive/Sk-DL2021-Datasets')
else:
    root_dir = repo_dir.parent / 'generated_datasets'
if not os.path.exists(root_dir):
    os.makedirs(root_dir, exist_ok=True)
    
root_dir

In [None]:
import yaml
CONFIG_NAME = '2d_nonlin0.50_no_disp'
CONFIG = repo_dir/'configs'/f"{CONFIG_NAME}.yaml"
BATCH_SIZE = 1
GENERATE_TRAIN_BATCHES = 50
GENERATE_VAL_BATCHES = 15
GENERATE_TEST_BATCHES = 0

if os.path.exists(CONFIG):
    with open(CONFIG, 'r') as stream:
        config_hparams = yaml.safe_load(stream)['data']['init_args']
        config_hparams['data_source_type'] = 'generation'
        config_hparams['load_dataset_root_path'] = None
        config_hparams['batch_size'] = BATCH_SIZE
        config_hparams['generate_n_train_batches'] = GENERATE_TRAIN_BATCHES
        config_hparams['generate_n_val_batches'] = GENERATE_VAL_BATCHES
        config_hparams['generate_n_test_batches'] = GENERATE_TEST_BATCHES
else:
    print('Config file cant be found')

In [None]:
config_hparams

In [None]:
import yaml
import datetime 

NEW_DIR_NAME = CONFIG_NAME

def create_destination(hparams, datasets_root, new_dir_name=None):
    if new_dir_name is None:
        new_dir = root_dir/datetime.datetime.now().strftime("%m-%d-%Y=%H-%M-%S")
    else:
        new_dir = root_dir/new_dir_name
    os.makedirs(new_dir)
    assert not os.path.exists(f'{new_dir}/signal_hparams.yaml')
    with open(f'{new_dir}/signal_hparams.yaml', 'w') as outfile:
        yaml.dump(hparams, outfile, default_flow_style=False)
    return new_dir
    
destination_root = find_dataset_subdir(data_gen.signal_hparams, root_dir)
if destination_root is None:
    destination_root = create_destination(data_gen.signal_hparams, root_dir, NEW_DIR_NAME)
print('Destination: ', destination_root)       

In [None]:
# make folders structure
def save_tensor(tensor, subdir):
    if tensor is None:
        print('Nothing to save', subdir)
        return
    if tensor.numel() == 0:
        return
    i = 0
    while os.path.exists(subdir/f"{i}.pt"):
        i += 1
    destination_path = subdir/f"{i}.pt"
    torch.save(torch.tensor([]), destination_path)
    torch.save(tensor.clone(), destination_path)
    
type_subdirs = [destination_root/sub for sub in ['train', 'val', 'test']]
for d in type_subdirs:
    os.makedirs(d, exist_ok=True)

In [None]:
# generate and save
N_REPEATS = 20

import time
begin = time.time()
for i in range(N_REPEATS):
  data_gen = SplitStepGenerator(**config_hparams)
  data_gen.prepare_data()
  data_gen.setup()
  print(data_gen.val.sum())
  save_tensor(data_gen.train, type_subdirs[0])
  save_tensor(data_gen.val, type_subdirs[1])
  save_tensor(data_gen.test, type_subdirs[2])
print(f'Time elapsed: {time.time() - begin}')

In [None]:
config_hparams['data_source_type'] = 'filesystem'
config_hparams['load_dataset_root_path'] = root_dir
config_hparams['batch_size'] = 20
data_gen_load = SplitStepGenerator(**config_hparams)
config_hparams

In [None]:
data_gen_load.prepare_data()

In [None]:
if data_gen_load.train is not None:
    print(data_gen_load.train.shape)