In [1]:
config = {}

config['wandb'] = {
    "dir": "/scratch/kanakala.ganesh/",
    "job_type": "sample",
    "project_name": "CLIP_Full",
    "run_name": "RUN_best_config"
}

config['data'] = {"qm9_broad_ir_path":'/home2/kanakala.ganesh/ir_data/qm9_broad_ir.pkl',
                  "vocab_path":'/home2/kanakala.ganesh/CLIP_PART_1/data/qm9_vocab.pkl',
                  "datafiles" : {
                        'train': '/home2/kanakala.ganesh/ir_data/raw_train.pickle',
                        'test':  '/home2/kanakala.ganesh/ir_data/raw_test.pickle',
                        'val':   '/home2/kanakala.ganesh/ir_data/raw_val.pickle'
                        },
                  "normalization" : "minmax",
                  "shuffle": True,
                  "batch_size":400,
                  "seq_len":70,
                  "splits":[0.8, 0.1, 0.1],
                  "num_workers":20
                }

config['molecule_encoder'] = {
    'attention': 1,
    'coords_weight' :1.0,
    'device': "cuda",
    'hidden_nf':256,
    'in_edge_nf':0,
    'in_node_nf':15,
    'n_layers': 5,
    'node_attr': 1,
    'output_size':512
}

config['molecule_decoder'] = {
    'in_size': 512,
    'latent_size' : 512,
    'hidden_size': 512,
    'n_layers' : 3,
    'n_heads' : 4
}

config['spectra_encoder'] = {
    'd_ff': 1024,
    'dropout': 0.1,
    'dropout_emb': 0.1,
    'h_dim': 512,
    'max_time_steps': 1000,
    'num_heads': 7,
    'num_layers': 5,
    'output_size': 512,
    'patch_size': 7 ,
    'use_clf_token': True,
}

config['train'] = {
    'lr':0.0001,
    'temperature' :1,
    'checkpoint_dir': "checkpoints/" + str(config['wandb']['run_name']),
    'device':"cuda",
    'num_epochs':500,
    'threshold': 0.9999,
    'weight_decay': 1.0e-06
}


config['data']['max_charge'] = None
config['data']['num_species'] = None

In [1]:
import yaml
# with open('./sample_config.yaml', 'w') as f:
#     yaml.dump(config, f)

In [2]:
config = yaml.safe_load(open('./configs/standard/unit_norm.yaml', 'r'))
# config['data']['normalization'] = "unit"
# config['wandb']['run_name'] = "RUN_unit_norm"
# config['train']['checkpoint_dir'] = "checkpoints/" + str(config['wandb']['run_name'])
# with open('configs/standard/unit_norm.yaml', 'w') as f:
#     yaml.dump(config, f)
    
# config = yaml.safe_load(open('./sample_config.yaml', 'r'))
# config['data']['normalization'] = "minmax"
# config['wandb']['run_name'] = "RUN_minmax_norm"
# config['train']['checkpoint_dir'] = "checkpoints/" + str(config['wandb']['run_name'])
# with open('configs/standard/minmax_norm.yaml', 'w') as f:
#     yaml.dump(config, f)
    

In [7]:
for batch_size in [16, 32, 64, 128, 256, 512]:
    config = yaml.safe_load(open('./configs/standard/unit_norm.yaml', 'r'))
    config['wandb']['project_name'] = 'CLIP_batch_size'
    config['data']['batch_size'] = batch_size
    config['wandb']['run_name'] = "run_batch_size_" + str(batch_size)
    config['train']['checkpoint_dir'] = "checkpoints/" + config['wandb']['run_name']
    with open('configs/batch_size/batch_'+str(batch_size)+'.yaml', 'w') as f:
        yaml.dump(config, f)

In [3]:
for window_size in [1,3,5,7,9,11,13,15]:
    config = yaml.safe_load(open('./configs/standard/unit_norm.yaml', 'r'))
    config['data']['batch_size'] = 256
    config['wandb']['project_name'] = 'CLIP_window_size'
    config['spectra_encoder']['patch_size'] = window_size
    config['wandb']['run_name'] = "run_window_size_" + str(window_size)
    config['train']['checkpoint_dir'] = "checkpoints/" + config['wandb']['run_name']
    with open('configs/window_size/window_'+str(window_size)+'.yaml', 'w') as f:
        yaml.dump(config, f)