In [1]:
from ase.db import connect
#Create a training folder and fill it with POSCARS 
from ase.io import read, write
from os.path import join
import concurrent.futures
import sqlite3
import pandas as pd
from os import path, makedirs
db = connect('C2DB.db')  # Replace with your database name
id_list = []
e_hull_list = []

for row in db.select():
    
    if 'ehull' in row:
        id_list.append(row.id)
        e_hull_list.append(row.ehull)

def write_poscar_from_id(id:str, db_path:str, output_path:str):
    atoms = read('{}@id={}'.format(db_path,id))
    file_name = '{}.vasp'.format(id)
    file_path = join(output_path,file_name)
    write(file_path,atoms,format='vasp',direct = True)

def wrapped_write_poscar_from_id(id):
    write_poscar_from_id(id, db_path='C2DB.db', output_path='train-folder/')

# Use ThreadPoolExecutor to apply the wrapped function to each element in the id list
with concurrent.futures.ThreadPoolExecutor() as executor:
    executor.map(wrapped_write_poscar_from_id, id_list)

In [None]:
id_strings = [str(id)+'.vasp' for id in id_list]

In [None]:
f = open("train-folder/id_prop.csv", "w")

for i in range(len(id_strings)):
    f.write("%s,%6f\n" % (id_strings[i], e_hull_list[i]))

f.close()

In [None]:
import json
# Define a configuration dictionary for the ALIGNN model
data = {
    "version": "112bbedebdaecf59fb18e11c929080fb2f358246",
    "dataset": "user_data",
    "target": "target",
    "atom_features": "cgcnn",
    "neighbor_strategy": "k-nearest",
    "id_tag": "jid",
    "random_seed": 123,
    "n_val": None,
    "n_test": None,
    "n_train": None,
    "train_ratio": 0.8,
    "val_ratio": 0.1,
    "test_ratio": 0.1,
    "target_multiplication_factor": None,
    "epochs": 200,
    "batch_size": 32,
    "weight_decay": 1e-05,
    "learning_rate": 0.001,
    "filename": "sample",
    "warmup_steps": 2000,
    "criterion": "mse",
    "optimizer": "adamw",
    "scheduler": "onecycle",
    "pin_memory": False,
    "save_dataloader": False,
    "write_checkpoint": True,
    "write_predictions": True,
    "store_outputs": True,
    "progress": True,
    "log_tensorboard": False,
    "standard_scalar_and_pca": False,
    "use_canonize": True,
    "num_workers": 0,
    "cutoff": 8.0,
    "max_neighbors": 12,
    "n_early_stopping": 15,
    "keep_data_order": False,
    "n_early_stopping": 15,
    "model": {
        "name": "alignn",
        "alignn_layers": 4,
        "gcn_layers": 4,
        "atom_input_features": 92,
        "edge_input_features": 80,
        "triplet_input_features": 40,
        "embedding_features": 64,
        "hidden_features": 256,
        "output_features": 1,
        "link": "identity",
        "zero_inflated": False,
    }
}

with open('train-folder/config.json', 'w') as f:
    json.dump(data, f)

In [None]:
!nohup train_folder.py --root_dir "train-folder" --config "train-folder/config.json" --output_dir "with_transfer" --restart_model_path "train-folder" &