<a href="https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/atomgpt_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## AtomGPT example: https://pubs.acs.org/doi/10.1021/acs.jpclett.4c01126


# Table of contents

1. Installing [AtomGPT](https://github.com/usnistgov/atomgpt)
2. Example inverse model training for 5 materials
3. Using the trained model for inference
4. Relaxing structures with ALIGNN-FF
5. Generating a database of atomic structures


Author: Kamal Choudhary (kamal.choudhary@nist.gov)

In [None]:
# !pip install -q condacolab
# import condacolab
# condacolab.install()

⏬ Downloading https://github.com/jaimergp/miniforge/releases/download/24.11.2-1_colab/Miniforge3-colab-24.11.2-1_colab-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:13
🔁 Restarting kernel...


Installation

In [1]:
%%time
import os
os.chdir('/content')
!rm -rf Software
os.makedirs('/content/Software')
os.chdir('/content/Software')
if not os.path.exists('atomgpt'):
  !rm -rf atomgpt
  !git clone https://github.com/atomgptlab/atomgpt.git
  !git checkout develop
  os.chdir('atomgpt')
  !pip install -q -e .



Cloning into 'atomgpt'...
remote: Enumerating objects: 1348, done.[K
remote: Counting objects: 100% (66/66), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 1348 (delta 54), reused 52 (delta 49), pack-reused 1282 (from 1)[K
Receiving objects: 100% (1348/1348), 67.15 MiB | 23.26 MiB/s, done.
Resolving deltas: 100% (766/766), done.
fatal: not a git repository (or any of the parent directories): .git
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.5/104.5 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m344.6 kB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━

In [2]:
!pip install -q jarvis_leaderboard

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.1/72.1 MB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m76.0 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
textblob 0.19.0 requires nltk>=3.9, but you have nltk 3.8.1 which is incompatible.[0m[31m
[0m

In [3]:
# Check import
import atomgpt
import os
os.environ.pop('MPLBACKEND', None)  # Remove the invalid backend
import matplotlib
matplotlib.use('Agg')  # Use a compatible backend
import matplotlib.pyplot as plt



In [4]:
!jarvis_populate_data.py --benchmark_file AI-SinglePropertyPrediction-exfoliation_energy-dft_3d-test-mae --output_path=Out

benchmark_file AI-SinglePropertyPrediction-exfoliation_energy-dft_3d-test-mae
dataset dft_3d
output_path Out
property exfoliation_energy
method AI
task SinglePropertyPrediction
id_tag jid
out_format poscar
dataset file to be used /usr/local/lib/python3.11/dist-packages/jarvis_leaderboard/benchmarks/AI/SinglePropertyPrediction/dft_3d_exfoliation_energy.json.zip
Currently for atomistic datasets only.
https://jarvis-tools.readthedocs.io/en/master/databases.html
Obtaining 3D dataset 76k ...
Reference:https://www.nature.com/articles/s41524-020-00440-1
Other versions:https://doi.org/10.6084/m9.figshare.6815699
100% 40.8M/40.8M [00:03<00:00, 12.4MiB/s]
Loading the zipfile...
Loading completed.
number of training samples 650
number of validation samples 81
number of test samples 81


In [5]:
from jarvis.db.jsonutils import loadjson,dumpjson
dataset_info = loadjson('Out/dataset_info.json')
#print(dataset_info)
n_train = dataset_info['n_train']
n_val = dataset_info['n_val']
n_test = dataset_info['n_test']

In [6]:
temp_config={'id_prop_path': "Out/id_prop.csv",
 'prefix': 'atomgpt_run',
 'model_name': "knc6/atomgpt_mistral_tc_supercon",
 'batch_size': 2,
 'num_epochs': 5,
 'seed_val': 42,
 'num_train': 2,
 'num_test': 2,
 'model_save_path': 'lora_model_m'}
dumpjson(data=temp_config,filename='atomgpt_inverse_config.json')

In [None]:
!pwd

/content/Software/atomgpt


In [9]:
!pip install triton==3.2.0

Collecting triton==3.2.0
  Downloading triton-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading triton-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (253.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.2/253.2 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: triton
  Attempting uninstall: triton
    Found existing installation: triton 3.3.0
    Uninstalling triton-3.3.0:
      Successfully uninstalled triton-3.3.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torch 2.7.0 requires triton==3.3.0; platform_system == "Linux" and platform_machine == "x86_64", but you have triton 3.2.0 which is incompatible.
torchaudio 2.6.0+cu124 requires torch==2.6.0, but you have torch 2.7.0 which is incompatible.
fastai 2.7.19 requires torch<2.7,>=1.10,

In [10]:
import triton
triton.__version__

'3.2.0'

In [None]:
%%bash
export WANDB_MODE=offline

In [11]:
!python atomgpt/inverse_models/inverse_models.py --config_name atomgpt/examples/inverse_model/config.json

2025-07-09 04:50:53.802864: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752036653.841990    4389 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752036653.853035    4389 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-09 04:50:53.891604: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

         _                   _____ _____ _______ 
    /\  | |                 / ____|  __ \__   __|
   /  \ | |_ ___

In [None]:
!atomgpt_inverse_train --config_name atomgpt_inverse_config.json




         _                   _____ _____ _______ 
    /\  | |                 / ____|  __ \__   __|
   /  \ | |_ ___  _ __ ___ | |  __| |__) | | |   
  / /\ \| __/ _ \| '_ ` _ \| | |_ |  ___/  | |   
 / ____ \ || (_) | | | | | | |__| | |      | |   
/_/    \_\__\___/|_| |_| |_|\_____|_|      |_|   
   
config_file atomgpt_inverse_config.json
{'alpaca_prompt': '### Instruction:\n{}\n### Input:\n{}\n### Output:\n{}',
 'batch_size': 2,
 'chem_info': 'formula',
 'csv_out': 'AI-AtomGen-prop-dft_3d-test-rmse.csv',
 'dataset_num_proc': 2,
 'dtype': None,
 'file_format': 'poscar',
 'gradient_accumulation_steps': 4,
 'id_prop_path': 'Out/id_prop.csv',
 'id_tag': 'id',
 'instruction': 'Below is a description of a superconductor material.',
 'learning_rate': 0.0002,
 'load_in_4bit': True,
 'logging_steps': 1,
 'loss_type': 'default',
 'lr_scheduler_type': 'linear',
 'max_seq_length': 2048,
 'model_name': 'knc6/atomgpt_mistral_tc_supercon',
 'model_save_path': 'lora_model_m',
 'num_epochs': 5,


# Training forward/inverse models with AtomGPT requires:

# 1) `config.json` file, 2) `id_prop.csv` file.

# 2. Example inverse model training for 5 materials

# Inverse Model Example

We are going to use default config:

TrainingPropConfig(id_prop_path='id_prop.csv', prefix='atomgpt_run', model_name='unsloth/mistral-7b-bnb-4bit', batch_size=2, num_epochs=2, seed_val=42, num_train=2, num_val=2, num_test=2, model_save_path='lora_model_m')


We are going to use a small id_prop.csv dataset with 5 materials only for training as given [here](https://github.com/usnistgov/atomgpt/blob/main/atomgpt/examples/inverse_model/id_prop.csv) . For production results, use larger dataset.



An example for creating a sample id_prop.csv for `"optb88vdw_bandgap"` bandgap is kept [here](https://github.com/usnistgov/alignn/blob/main/alignn/examples/sample_data/scripts/generate_sample_data_reg.py). For superconductor database use `"Tc_supercon"` key instead.

In [None]:
# Lets' look at an example config file before running the training
import os
os.chdir('/content')
from jarvis.db.jsonutils import loadjson,dumpjson
import pprint
config = loadjson('Software/atomgpt/atomgpt/examples/inverse_model/config.json')
# config['model_name'] = "knc6/atomgpt_mistral_tc_supercon"
dumpjson(data=config,filename='Software/atomgpt/atomgpt/examples/inverse_model/config.json')
pprint.pprint(config)

{'alpaca_prompt': '### Instruction:\n{}\n### Input:\n{}\n### Output:\n{}',
 'batch_size': 2,
 'chem_info': 'formula',
 'csv_out': 'AI-AtomGen-prop-dft_3d-test-rmse.csv',
 'dataset_num_proc': 2,
 'dtype': None,
 'gradient_accumulation_steps': 4,
 'id_prop_path': 'atomgpt/examples/inverse_model/id_prop.csv',
 'instruction': 'Below is a description of a superconductor material.',
 'learning_rate': 0.0002,
 'load_in_4bit': True,
 'logging_steps': 1,
 'loss_type': 'default',
 'lr_scheduler_type': 'linear',
 'max_seq_length': 2048,
 'model_name': 'knc6/atomgpt_mistral_tc_supercon',
 'model_save_path': 'lora_model_m',
 'num_epochs': 2,
 'num_test': 2,
 'num_train': 2,
 'num_val': 0,
 'optim': 'adamw_8bit',
 'output_dir': 'outputs',
 'output_prompt': ' Generate atomic structure description with lattice '
                  'lengths, angles, coordinates and atom types.',
 'per_device_train_batch_size': 2,
 'prefix': 'atomgpt_run',
 'seed_val': 3407}


In [None]:
os.chdir('/content/Software/atomgpt')
!atomgpt_inverse_train --config_name atomgpt/examples/inverse_model/config.json
#!python Software/atomgpt/atomgpt/inverse_models/inverse_models.py --config_name Software/atomgpt/atomgpt/examples/inverse_model/config.json




         _                   _____ _____ _______ 
    /\  | |                 / ____|  __ \__   __|
   /  \ | |_ ___  _ __ ___ | |  __| |__) | | |   
  / /\ \| __/ _ \| '_ ` _ \| | |_ |  ___/  | |   
 / ____ \ || (_) | | | | | | |__| | |      | |   
/_/    \_\__\___/|_| |_| |_|\_____|_|      |_|   
   
config_file atomgpt/examples/inverse_model/config.json
{'alpaca_prompt': '### Instruction:\n{}\n### Input:\n{}\n### Output:\n{}',
 'batch_size': 2,
 'chem_info': 'formula',
 'csv_out': 'AI-AtomGen-prop-dft_3d-test-rmse.csv',
 'dataset_num_proc': 2,
 'dtype': None,
 'file_format': 'poscar',
 'gradient_accumulation_steps': 4,
 'id_prop_path': 'atomgpt/examples/inverse_model/id_prop.csv',
 'id_tag': 'id',
 'instruction': 'Below is a description of a superconductor material.',
 'learning_rate': 0.0002,
 'load_in_4bit': True,
 'logging_steps': 1,
 'loss_type': 'default',
 'lr_scheduler_type': 'linear',
 'max_seq_length': 2048,
 'model_name': 'knc6/atomgpt_mistral_tc_supercon',
 'model_save

In [None]:

# from atomgpt.inverse_models.inverse_models import gen_atoms
# from atomgpt.inverse_models import  FastLanguageModel

# alpaca_prompt = """Below is a description of a superconductor material..

# ### Instruction:
# {}

# ### Input:
# {}

# ### Output:
# {}"""

# max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
# dtype = None  #
# load_in_4bit = True
# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name = "lora_model_m", # YOUR MODEL YOU USED FOR TRAINING
#     max_seq_length = max_seq_length,
#     dtype = dtype,
#     load_in_4bit = load_in_4bit,
#     device_map="auto"

# )
# FastLanguageModel.for_inference(model) # Enable native 2x faster inference


# # Example prompt and generated structure
# if __name__=="__main__":
#  prompt_example = "The chemical formula is FeBN The  prop is 36.483. Generate atomic structure description with lattice lengths, angles, coordinates and atom types."

#  gen_mat = gen_atoms(prompt=prompt_example,model=model,tokenizer=tokenizer)
#  print(gen_mat)

In [None]:
!python atomgpt/inverse_models/inverse_predict.py --output_dir outputs/ --pred_csv "atomgpt/examples/inverse_model/pred_list_inverse.csv"



{'alpaca_prompt': '### Instruction:\n{}\n### Input:\n{}\n### Output:\n{}',
 'batch_size': 2,
 'chem_info': 'formula',
 'csv_out': 'AI-AtomGen-prop-dft_3d-test-rmse.csv',
 'dataset_num_proc': 2,
 'dtype': None,
 'file_format': 'poscar',
 'gradient_accumulation_steps': 4,
 'id_prop_path': 'atomgpt/examples/inverse_model/id_prop.csv',
 'id_tag': 'id',
 'instruction': 'Below is a description of a superconductor material.',
 'learning_rate': 0.0002,
 'load_in_4bit': True,
 'logging_steps': 1,
 'loss_type': 'default',
 'lr_scheduler_type': 'linear',
 'max_seq_length': 2048,
 'model_name': 'knc6/atomgpt_mistral_tc_supercon',
 'model_save_path': 'lora_model_m',
 'num_epochs': 2,
 'num_test': 2,
 'num_train': 2,
 'num_val': 0,
 'optim': 'adamw_8bit',
 'output_dir': 'outputs',
 'output_prompt': ' Generate atomic structure description with lattice '
                  'lengths, angles, coordinates and atom types.',
 'per_device_train_batch_size': 2,
 'prefix': 'atomgpt_run',
 'prop': 'Tc_superco

# Extras

In [None]:
!ls


AI-AtomGen-prop-dft_3d-test-rmse.csv  huggingface_tokenizers_cache  outputs
atomgpt				      LICENSE.rst		    pyproject.toml1
atomgpt.egg-info		      lora_model_m		    README.md
atomgpt_inverse_config.json	      Out			    requirements.txt
environment.yml			      out_inv.json		    setup.py


Files such as `AI-AtomGen-prop-dft_3d-test-rmse.csv ` can be uploaded in the [JARVIS-Leaderboard](https://pages.nist.gov/jarvis_leaderboard/) benchmarking plotform.



The models are saved in the folder `lora_model_m`

In [None]:
!ls lora_model_m

adapter_config.json  adapter_model.safetensors	config.json  README.md


Let's look at `alpaca_prop_test.json` and `alpaca_prop_train.json`

In [None]:
!ls outputs/

alpaca_prop_test.json  alpaca_prop_train.json  config.json


In [None]:
alpaca_prop_test=loadjson('outputs/alpaca_prop_test.json')
alpaca_prop_train=loadjson('outputs/alpaca_prop_train.json')
print(len(alpaca_prop_test),len(alpaca_prop_train))
print('\n')
pprint.pprint(alpaca_prop_train[0])
print('\n')


2 2


{'input': 'The chemical formula is NaZnP . The  Tc_supercon is 124.8. Generate '
          'atomic structure description with lattice lengths, angles, '
          'coordinates and atom types.',
 'instruction': 'Below is a description of a superconductor material.',
 'output': '4.07 4.07 6.89\n'
           '90 90 90\n'
           'Na 0.000 0.500 0.644\n'
           'Na 0.500 0.000 0.356\n'
           'Zn 0.000 0.000 0.000\n'
           'Zn 0.500 0.500 0.000\n'
           'P 0.500 0.000 0.785\n'
           'P 0.000 0.500 0.215'}




# 3. Using the trained model for inference

Let's load the trained model for inference/testing. Note again this model was trained on just a few samples, so accuracy wont be very high.

In [None]:

# from jarvis.db.jsonutils import loadjson
# from atomgpt.inverse_models import  FastLanguageModel
# import torch
# from datasets import load_dataset
# from trl import SFTTrainer
# from transformers import TrainingArguments
# from jarvis.core.atoms import Atoms
# from jarvis.db.figshare import data
# from jarvis.db.jsonutils import loadjson, dumpjson
# import numpy as np
# from jarvis.core.atoms import Atoms
# from jarvis.core.lattice import Lattice
# from tqdm import tqdm
# from jarvis.io.vasp.inputs import Poscar

# import os
# #os.environ['CUDA_VISIBLE_DEVICES']='0'
# #torch.cuda.is_available = lambda : False
# alpaca_prompt = """Below is a description of a superconductor material..

# ### Instruction:
# {}

# ### Input:
# {}

# ### Output:
# {}"""

# max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
# dtype = None  #
# load_in_4bit = True
# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name = "lora_model_m", # YOUR MODEL YOU USED FOR TRAINING
#     max_seq_length = max_seq_length,
#     dtype = dtype,
#     load_in_4bit = load_in_4bit,
#     device_map="auto"

# )
# FastLanguageModel.for_inference(model) # Enable native 2x faster inference


# def text2atoms(response):
#     tmp_atoms_array = response.split("\n")
#     lat_lengths = np.array(tmp_atoms_array[1].split(), dtype="float")
#     lat_angles = np.array(tmp_atoms_array[2].split(), dtype="float")
#     lat = Lattice.from_parameters(
#         lat_lengths[0],
#         lat_lengths[1],
#         lat_lengths[2],
#         lat_angles[0],
#         lat_angles[1],
#         lat_angles[2],
#     )
#     elements = []
#     coords = []
#     for ii, i in enumerate(tmp_atoms_array):
#         if ii > 2 and ii < len(tmp_atoms_array):
#             tmp = i.split()
#             elements.append(tmp[0])
#             coords.append([float(tmp[1]), float(tmp[2]), float(tmp[3])])
#     atoms = Atoms(
#         coords=coords,
#         elements=elements,
#         lattice_mat=lat.lattice(),
#         cartesian=False,
#     )
#     return atoms

# def gen_atoms(prompt="", max_new_tokens=512, model="", tokenizer=""):
#     inputs = tokenizer(
#         [
#             alpaca_prompt.format(
#                 "Below is a description of a superconductor material.",  # instruction
#                 prompt,  # input
#                 "",  # output - leave this blank for generation!
#             )
#         ],
#         return_tensors="pt",
#     ).to("cuda")
#     outputs = model.generate(
#         **inputs, max_new_tokens=max_new_tokens, use_cache=True
#     )
#     response = tokenizer.batch_decode(outputs)[0].split("# Output:")[1].strip('</s>')
#     # print('response',response)
#     atoms = text2atoms(response)
#     return atoms

# if __name__=="__main__":
#  prompt_example = "The chemical formula is MgB2 The  Tc_supercon is 6.483. The spacegroup is 12. Generate atomic structure description with lattice lengths, angles, coordinates and atom types."
#  prompt_example = "The chemical formula is FeBN The  Tc_supercon is 36.483. Generate atomic structure description with lattice lengths, angles, coordinates and atom types."

#  gen_mat = gen_atoms(prompt=prompt_example,model=model,tokenizer=tokenizer)
#  print(gen_mat)

#4. Relaxing structures with ALIGNN-FF

The generated atomic structures can be relaxed with ALIGNN-FF, see example [here](https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/ALIGNN_Structure_Relaxation_Phonons_Interface.ipynb).

The above example used 5 materials during train only. We used about 1000 materials database in JARVIS-DFT, and the fine-tuned model is kept on [huggingface](https://huggingface.co/knc6/atomgpt_mistral_tc_supercon).

# 5. Generating a database

In [None]:
# from jarvis.core.specie import atomic_numbers_to_symbols
# import numpy as np
# from jarvis.db.jsonutils import loadjson, dumpjson
# from jarvis.core.composition import Composition
# from tqdm import tqdm
# from inf import gen_atoms

# Z = np.arange(100) + 1
# els = atomic_numbers_to_symbols(Z)

# m = 1
# n = 2


# def gen_binary_samples(element="B"):
#     mem = []
#     for m in np.arange(1, 4):
#         for n in np.arange(1, 4):
#             for i in tqdm(els):
#                 try:
#                     comp = Composition.from_dict({i: m, element: n})
#                     prompt_example = (
#                         "The chemical formula is "
#                         + comp.reduced_formula
#                         + " The  Tc_supercon is 100. Generate atomic structure description with lattice lengths, angles, coordinates and atom types."
#                     )
#                     gen_mat = gen_atoms(prompt_example)
#                     print(i)
#                     print(gen_mat, len(mem))
#                     mem.append([int(m), int(n), i, gen_mat.to_dict()])
#                     # dumpjson(data=mem,filename='superB.json')
#                 except:
#                     pass
#     fname="binary_super"+element+".json"
#     dumpjson(data=mem, filename=fname)
# gen_binary_samples("S")
# gen_binary_samples("Se")
# gen_binary_samples("Te")
# def gen_ternary_samples(element="B"):
#     mem = []
#     for m in np.arange(1, 4):
#         for n in np.arange(1, 4):
#           for j in tqdm(els):
#             for i in tqdm(els):
#                 try:
#                     comp = Composition.from_dict({i: m, j:n, element: n})
#                     prompt_example = (
#                         "The chemical formula is "
#                         + comp.reduced_formula
#                         + " The  Tc_supercon is 100. Generate atomic structure description with lattice lengths, angles, coordinates and atom types."
#                     )
#                     gen_mat = gen_atoms(prompt_example)
#                     print(i)
#                     print(gen_mat, len(mem))
#                     mem.append([int(m), int(n), i, gen_mat.to_dict()])
#                     # dumpjson(data=mem,filename='superB.json')
#                 except:
#                     pass
#     fname="binary_super"+element+".json"
#     dumpjson(data=mem, filename=fname)
# gen_ternary_samples("B")

# """

# m=1
# n=2
# mem=[]
# for m in np.arange(1,4):
#   for n in np.arange(1,4):
#     for i in tqdm(els):
#       try:
#         comp=Composition.from_dict({i:m,"C":n})
#         prompt_example = "The chemical formula is "+comp.reduced_formula+" The  Tc_supercon is 100. Generate atomic structure description with lattice lengths, angles, coordinates and atom types."
#         gen_mat = gen_atoms(prompt_example)
#         print(i)
#         print(gen_mat,len(mem))
#         mem.append([int(m),int(n),i,gen_mat.to_dict()])
#         #mem.append([m,n,i,gen_mat.to_dict()])
#       except:
#         pass
# dumpjson(data=mem,filename='superC.json')



# m=1
# n=2
# mem=[]
# for m in np.arange(1,4):
#   for n in np.arange(1,4):
#     for i in tqdm(els):
#       try:
#         comp=Composition.from_dict({i:m,"N":n})
#         prompt_example = "The chemical formula is "+comp.reduced_formula+" The  Tc_supercon is 100. Generate atomic structure description with lattice lengths, angles, coordinates and atom types."
#         gen_mat = gen_atoms(prompt_example)
#         print(i)
#         print(gen_mat,len(mem))
#         mem.append([int(m),int(n),i,gen_mat.to_dict()])
#         #mem.append([m,n,i,gen_mat.to_dict()])
#       except:
#         pass
# dumpjson(data=mem,filename='superN.json')
# """


# For forward model training with AtomGPT, see https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/atomgpt_forward_example.ipynb

In [None]:
!conda env export

name: base
channels:
  - conda-forge
dependencies:
  - _libgcc_mutex=0.1=conda_forge
  - _openmp_mutex=4.5=2_gnu
  - archspec=0.2.2=pyhd8ed1ab_0
  - boltons=23.1.1=pyhd8ed1ab_0
  - brotli-python=1.1.0=py310hc6cd4ac_1
  - bzip2=1.0.8=hd590300_5
  - c-ares=1.24.0=hd590300_0
  - ca-certificates=2023.11.17=hbcca054_0
  - cffi=1.16.0=py310h2fee648_0
  - charset-normalizer=3.3.2=pyhd8ed1ab_0
  - colorama=0.4.6=pyhd8ed1ab_0
  - conda=23.11.0=py310hff52083_1
  - conda-libmamba-solver=23.12.0=pyhd8ed1ab_0
  - conda-package-handling=2.2.0=pyh38be061_0
  - conda-package-streaming=0.9.0=pyhd8ed1ab_0
  - distro=1.8.0=pyhd8ed1ab_0
  - fmt=10.1.1=h00ab1b0_1
  - icu=73.2=h59595ed_0
  - jsonpatch=1.33=pyhd8ed1ab_0
  - jsonpointer=2.4=py310hff52083_3
  - keyutils=1.6.1=h166bdaf_0
  - krb5=1.21.2=h659d440_0
  - ld_impl_linux-64=2.40=h41732ed_0
  - libarchive=3.7.2=h2aa1ff5_1
  - libcurl=8.5.0=hca28451_0
  - libedit=3.1.20191231=he28a2e2_2
  - libev=4.33=hd590300_2
  - libffi=3.4.2=h7f98852_5
  - libgcc-n

In [None]:
!pip freeze

accelerate==0.31.0
aiohttp==3.9.5
aiosignal==1.3.1
alignn==2024.4.20
annotated-types==0.7.0
archspec @ file:///home/conda/feedstock_root/build_artifacts/archspec_1699370045702/work
ase==3.23.0
async-timeout==4.0.3
-e git+https://github.com/usnistgov/atomgpt.git@a516955aa3348e628175d024c6b16896ba34e31a#egg=atomgpt
attrs==23.2.0
autopep8==2.3.1
bitsandbytes==0.43.1
black==24.4.2
boltons @ file:///home/conda/feedstock_root/build_artifacts/boltons_1703154663129/work
Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1695989787169/work
certifi==2024.6.2
cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1696001684923/work
chardet==3.0.4
charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1698833585322/work
click==8.1.7
colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work
conda @ file:///home/conda/feedstock_root/build_artifacts/conda_1701731572133/work
conda-libmamba-solver @ file:///ho

In [None]:
!conda env export

name: base
channels:
  - conda-forge
dependencies:
  - _libgcc_mutex=0.1=conda_forge
  - _openmp_mutex=4.5=2_gnu
  - archspec=0.2.2=pyhd8ed1ab_0
  - boltons=23.1.1=pyhd8ed1ab_0
  - brotli-python=1.1.0=py310hc6cd4ac_1
  - bzip2=1.0.8=hd590300_5
  - c-ares=1.24.0=hd590300_0
  - ca-certificates=2023.11.17=hbcca054_0
  - cffi=1.16.0=py310h2fee648_0
  - charset-normalizer=3.3.2=pyhd8ed1ab_0
  - colorama=0.4.6=pyhd8ed1ab_0
  - conda=23.11.0=py310hff52083_1
  - conda-libmamba-solver=23.12.0=pyhd8ed1ab_0
  - conda-package-handling=2.2.0=pyh38be061_0
  - conda-package-streaming=0.9.0=pyhd8ed1ab_0
  - distro=1.8.0=pyhd8ed1ab_0
  - fmt=10.1.1=h00ab1b0_1
  - icu=73.2=h59595ed_0
  - jsonpatch=1.33=pyhd8ed1ab_0
  - jsonpointer=2.4=py310hff52083_3
  - keyutils=1.6.1=h166bdaf_0
  - krb5=1.21.2=h659d440_0
  - ld_impl_linux-64=2.40=h41732ed_0
  - libarchive=3.7.2=h2aa1ff5_1
  - libcurl=8.5.0=hca28451_0
  - libedit=3.1.20191231=he28a2e2_2
  - libev=4.33=hd590300_2
  - libffi=3.4.2=h7f98852_5
  - libgcc-n

In [None]:
# env="""name:base
# channels:
#   - xformers
#   - pytorch
#   - nvidia
#   - conda-forge
#   - defaults
# dependencies:
#   - _libgcc_mutex=0.1=conda_forge
#   - _openmp_mutex=4.5=2_gnu
#   - blas=1.0=mkl
#   - bzip2=1.0.8=h7f98852_4
#   - ca-certificates=2024.2.2=hbcca054_0
#   - cairo=1.18.0=h3faef2a_0
#   - cffi=1.16.0=py39h7a31438_0
#   - cuda-cudart=12.1.105=0
#   - cuda-cupti=12.1.105=0
#   - cuda-libraries=12.1.0=0
#   - cuda-nvrtc=12.1.105=0
#   - cuda-nvtx=12.1.105=0
#   - cuda-opencl=12.4.99=0
#   - cuda-runtime=12.1.0=0
#   - cudatoolkit=11.7.0=hd8887f6_10
#   - expat=2.5.0=hcb278e6_1
#   - filelock=3.15.4=pyhd8ed1ab_0
#   - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
#   - font-ttf-inconsolata=3.000=h77eed37_0
#   - font-ttf-source-code-pro=2.038=h77eed37_0
#   - font-ttf-ubuntu=0.83=hab24e00_0
#   - fontconfig=2.14.2=h14ed4e7_0
#   - fonts-conda-ecosystem=1=0
#   - fonts-conda-forge=1=0
#   - freetype=2.12.1=h267a509_2
#   - gettext=0.21.1=h27087fc_0
#   - gmp=6.3.0=h59595ed_1
#   - gmpy2=2.1.2=py39h376b7d2_1
#   - icu=73.2=h59595ed_0
#   - intel-openmp=2022.1.0=h9e868ea_3769
#   - jinja2=3.1.4=pyhd8ed1ab_0
#   - ld_impl_linux-64=2.40=h41732ed_0
#   - libblas=3.9.0=16_linux64_mkl
#   - libcblas=3.9.0=16_linux64_mkl
#   - libcublas=12.1.0.26=0
#   - libcufft=11.0.2.4=0
#   - libcufile=1.9.0.20=0
#   - libcurand=10.3.5.119=0
#   - libcusolver=11.4.4.55=0
#   - libcusparse=12.0.2.55=0
#   - libexpat=2.5.0=hcb278e6_1
#   - libffi=3.4.2=h7f98852_5
#   - libgcc-ng=13.2.0=h807b86a_2
#   - libgfortran-ng=13.2.0=h69a702a_5
#   - libgfortran5=13.2.0=ha4646dd_5
#   - libglib=2.78.0=hebfc3b9_0
#   - libgomp=13.2.0=h807b86a_2
#   - libiconv=1.17=h166bdaf_0
#   - liblapack=3.9.0=16_linux64_mkl
#   - libnpp=12.0.2.50=0
#   - libnsl=2.0.0=h7f98852_0
#   - libnvjitlink=12.1.105=0
#   - libnvjpeg=12.1.1.14=0
#   - libopenblas=0.3.26=pthreads_h413a1c8_0
#   - libpng=1.6.39=h753d276_0
#   - libprotobuf=3.21.12=hfc55251_2
#   - libsqlite=3.43.0=h2797004_0
#   - libstdcxx-ng=13.2.0=h7e041cc_2
#   - libuuid=2.38.1=h0b41bf4_0
#   - libxcb=1.15=h0b41bf4_0
#   - libxml2=2.11.5=h232c23b_1
#   - libzlib=1.2.13=hd590300_5
#   - llvm-openmp=15.0.7=h0cdce71_0
#   - markupsafe=2.1.5=py39hd1e30aa_0
#   - mkl=2022.1.0=hc2b9512_224
#   - mpc=1.3.1=hfe3b2da_0
#   - mpfr=4.2.1=h9458935_0
#   - mpmath=1.3.0=pyhd8ed1ab_0
#   - ncurses=6.4=hcb278e6_0
#   - networkx=3.2.1=pyhd8ed1ab_0
#   - ninja=1.11.1=h924138e_0
#   - openbabel=3.1.1=py39h421517d_8
#   - openssl=3.2.1=hd590300_1
#   - pcre2=10.40=hc3806b6_0
#   - pip=23.2.1=pyhd8ed1ab_0
#   - pixman=0.42.2=h59595ed_0
#   - pthread-stubs=0.4=h36c2ea0_1001
#   - pycparser=2.22=pyhd8ed1ab_0
#   - python=3.9.18=h0755675_0_cpython
#   - python_abi=3.9=4_cp39
#   - pytorch=2.2.2=py3.9_cuda12.1_cudnn8.9.2_0
#   - pytorch-cuda=12.1=ha16c6d3_5
#   - pytorch-mutex=1.0=cuda
#   - pyyaml=6.0.1=py39hd1e30aa_1
#   - readline=8.2=h8228510_1
#   - setuptools=68.2.2=pyhd8ed1ab_0
#   - sleef=3.5.1=h9b69904_2
#   - sympy=1.12=pypyh9d50eac_103
#   - tk=8.6.13=h2797004_0
#   - torchtriton=2.2.0=py39
#   - typing_extensions=4.10.0=pyha770c72_0
#   - wheel=0.43.0=pyhd8ed1ab_1
#   - xformers=0.0.25.post1=py39_cu12.1.0_pyt2.2.2
#   - xorg-kbproto=1.0.7=h7f98852_1002
#   - xorg-libice=1.1.1=hd590300_0
#   - xorg-libsm=1.2.4=h7391055_0
#   - xorg-libx11=1.8.7=h8ee46fc_0
#   - xorg-libxau=1.0.11=hd590300_0
#   - xorg-libxdmcp=1.1.3=h7f98852_0
#   - xorg-libxext=1.3.4=h0b41bf4_2
#   - xorg-libxrender=0.9.11=hd590300_0
#   - xorg-renderproto=0.11.1=h7f98852_1002
#   - xorg-xextproto=7.3.0=h0b41bf4_1003
#   - xorg-xproto=7.0.31=h7f98852_1007
#   - xz=5.2.6=h166bdaf_0
#   - yaml=0.2.5=h7f98852_2
#   - zlib=1.2.13=hd590300_5
#   - pip:
#       - accelerate==0.31.0
#       - aiohttp==3.9.5
#       - aiosignal==1.3.1
#       - alignn==2024.4.20
#       - annotated-types==0.7.0
#       - ase==3.23.0
#       - async-timeout==4.0.3
#       - attrs==23.2.0
#       - autopep8==2.3.1
#       - bitsandbytes==0.43.1
#       - black==24.4.2
#       - certifi==2024.6.2
#       - chardet==3.0.4
#       - charset-normalizer==3.3.2
#       - click==8.1.7
#       - contourpy==1.2.1
#       - cycler==0.12.1
#       - datasets==2.20.0
#       - dgl==1.1.1
#       - dill==0.3.8
#       - docstring-parser==0.16
#       - eval-type-backport==0.2.0
#       - flake8==7.1.0
#       - fonttools==4.53.0
#       - frozenlist==1.4.1
#       - fsspec==2024.5.0
#       - huggingface-hub==0.23.4
#       - idna==3.7
#       - importlib-resources==6.4.0
#       - jarvis-tools==2024.4.30
#       - joblib==1.4.2
#       - kiwisolver==1.4.5
#       - lmdb==1.4.1
#       - markdown-it-py==3.0.0
#       - matplotlib==3.9.0
#       - mccabe==0.7.0
#       - mdurl==0.1.2
#       - multidict==4.7.6
#       - multiprocess==0.70.16
#       - mypy-extensions==1.0.0
#       - numpy==1.26.4
#       - packaging==24.1
#       - pandas==2.2.2
#       - pathspec==0.12.1
#       - peft==0.11.1
#       - pillow==10.3.0
#       - platformdirs==4.2.2
#       - psutil==6.0.0
#       - pyarrow==16.1.0
#       - pyarrow-hotfix==0.6
#       - pycodestyle==2.12.0
#       - pydantic==2.7.4
#       - pydantic-core==2.18.4
#       - pydantic-settings==2.3.3
#       - pydocstyle==6.3.0
#       - pyflakes==3.2.0
#       - pygments==2.18.0
#       - pyparsing==2.4.7
#       - python-dateutil==2.9.0.post0
#       - python-dotenv==1.0.1
#       - pytz==2024.1
#       - regex==2024.5.15
#       - requests==2.32.3
#       - rich==13.7.1
#       - safetensors==0.4.3
#       - scikit-learn==1.5.0
#       - scipy==1.13.1
#       - sentencepiece==0.2.0
#       - shtab==1.7.1
#       - six==1.16.0
#       - snowballstemmer==2.2.0
#       - spglib==2.4.0
#       - threadpoolctl==3.5.0
#       - tokenizers==0.19.1
#       - tomli==2.0.1
#       - toolz==0.12.1
#       - torchdata==0.7.1
#       - tqdm==4.66.4
#       - transformers==4.41.2
#       - trl==0.8.6
#       - tyro==0.8.4
#       - tzdata==2024.1
#       - urllib3==2.2.2
#       - xmltodict==0.13.0
#       - xxhash==3.4.1
#       - yarl==1.9.4
#       - zipp==3.19.2
# """
# with open(f'/content/conda.yaml', 'w') as f:
#     f.write(env)
# # !conda env update --name base -f conda.yaml