In [1]:
import skipatom as sa
import pandas as pd
from skipatom import SkipAtomInducedModel
from skipatom import AtomVectors
import torch
import numpy as np
import random

In [2]:
input_data = pd.read_csv('./atomic_binding_energies_fixed.csv')

In [3]:
model_atoms = AtomVectors.load("embedding_data/atom2vec.dim30.model")

# Get the unique orbitals
unique_orbitals = list(np.unique(input_data['Orbital']))

In [4]:
len(model_atoms.dictionary)

87

In [5]:
def generate_random_vector(dim=1):
    return [random.uniform(0, 1) for _ in range(dim)]

orbital_vectors = {orbital: generate_random_vector() for orbital in unique_orbitals}


In [6]:
print(model_atoms.dictionary.keys())


dict_keys(['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu'])


In [7]:
np.unique(input_data['Atom'])

array(['Ag', 'Al', 'Ar', 'As', 'B', 'Ba', 'Bi', 'Br', 'C', 'Ca', 'Cd',
       'Cl', 'Co', 'Cr', 'Cs', 'Cu', 'F', 'Fe', 'Ga', 'Ge', 'Hg', 'I',
       'In', 'K', 'Kr', 'Li', 'Mg', 'Mn', 'Mo', 'N', 'Na', 'Ne', 'Ni',
       'O', 'P', 'Pb', 'Rb', 'Re', 'Rh', 'S', 'Sb', 'Se', 'Si', 'Sn',
       'Sr', 'Te', 'Ti', 'Tl', 'U', 'V', 'W', 'Xe', 'Zn'], dtype=object)

In [8]:
old_data = pd.read_csv('./atomic_binding_energies.csv')

In [9]:
embeddings = []

for i in range(len(input_data['Atom'])):
    try:
        atom_vector = torch.tensor(model_atoms.vectors[model_atoms.dictionary[input_data['Atom'][i]]], dtype=torch.float32)
    except:
        atom_vector = torch.tensor(np.zeros(30), dtype=torch.float32)
        print(input_data['Atom'][i])
    orbital_vector = torch.tensor(orbital_vectors[input_data['Orbital'][i]], dtype=torch.float32)
    final_embedding = torch.cat((atom_vector, orbital_vector))
    embeddings.append(np.array(final_embedding))


Ne
Ar
Ar
Ne


In [10]:
embd = {'Atoms': input_data['Atom'], 'Orbital': input_data['Orbital'], 'Embeddings': embeddings}
embd = pd.DataFrame(embd)

In [11]:
embd[embd["Atoms"] == "Tl"]

Unnamed: 0,Atoms,Orbital,Embeddings
91,Tl,4f7/2,"[1.266294, -2.2460766, -3.961528, -5.346277, -..."


In [12]:
embd.to_csv('final_embedding_dim30.csv', index=False, header = True)