In [1]:
import skipatom as sa
import pandas as pd
from skipatom import SkipAtomInducedModel
from skipatom import SkipAtomModel
from skipatom import AtomVectors
import torch
import numpy as np
import random

In [2]:
input_data = pd.read_csv('./atomic_binding_energies_fixed.csv')

In [3]:
model_atoms_30 = AtomVectors.load("embedding_data/atom2vec.dim30.model")

model_atoms_200_induced = SkipAtomInducedModel.load(
    "embedding_data/mp_2020_10_09.dim200.model", 
    "embedding_data/mp_2020_10_09.training.data", 
    min_count=2e7, top_n=5)

model_atoms_200_notinduced = SkipAtomModel.load(
    "embedding_data/mp_2020_10_09.dim200.model", 
    "embedding_data/mp_2020_10_09.training.data")

# Get the unique orbitals
unique_orbitals = list(np.unique(input_data['Orbital']))

In [4]:
def generate_random_vector(dim=1):
    return [random.uniform(0, 1) for _ in range(dim)]

def generate_one_hot_vector(dim=1):
    one_hot = [0] * dim
    index = random.randint(0, dim - 1)
    
    one_hot[index] = 1
    return one_hot

orbital_vectors_rand = {orbital: generate_random_vector() for orbital in unique_orbitals}
orbital_vectors_onehot = {orbital: generate_one_hot_vector() for orbital in unique_orbitals}


In [5]:
np.unique(input_data['Atom'])

array(['Ag', 'Al', 'Ar', 'As', 'B', 'Ba', 'Bi', 'Br', 'C', 'Ca', 'Cd',
       'Cl', 'Co', 'Cr', 'Cs', 'Cu', 'F', 'Fe', 'Ga', 'Ge', 'Hg', 'I',
       'In', 'K', 'Kr', 'Li', 'Mg', 'Mn', 'Mo', 'N', 'Na', 'Ne', 'Ni',
       'O', 'P', 'Pb', 'Rb', 'Re', 'Rh', 'S', 'Sb', 'Se', 'Si', 'Sn',
       'Sr', 'Te', 'Ti', 'Tl', 'U', 'V', 'W', 'Xe', 'Zn'], dtype=object)

In [6]:
# N=30, Random

embeddings = []

for i in range(len(input_data['Atom'])):
    try:
        atom_vector = torch.tensor(model_atoms_30.vectors[model_atoms_30.dictionary[input_data['Atom'][i]]], dtype=torch.float32)
    except:
        atom_vector = torch.tensor(np.zeros(30), dtype=torch.float32)
        #print(input_data['Atom'][i])
    orbital_vector = torch.tensor(orbital_vectors_rand[input_data['Orbital'][i]], dtype=torch.float32)
    final_embedding = torch.cat((atom_vector, orbital_vector))
    embeddings.append(np.array(final_embedding))

embd = {'Atoms': input_data['Atom'], 'Orbital': input_data['Orbital'], 'Embeddings': embeddings}
embd = pd.DataFrame(embd)
embd[embd["Atoms"] == "Tl"]

embd.to_csv('./New_embeddings/final_embedding_dim30_random-orbital.csv', index=False, header = True)



  embeddings.append(np.array(final_embedding))


In [7]:
# N=30, One Hot

embeddings = []
one_hot_check = []

for i in range(len(input_data['Atom'])):
    try:
        atom_vector = torch.tensor(model_atoms_30.vectors[model_atoms_30.dictionary[input_data['Atom'][i]]], dtype=torch.float32)
    except:
        atom_vector = torch.tensor(np.zeros(30), dtype=torch.float32)
        #print(input_data['Atom'][i])
    orbital_vector = torch.tensor(orbital_vectors_onehot[input_data['Orbital'][i]], dtype=torch.float32)
    if orbital_vector in one_hot_check:
        orbital_vector = torch.tensor(orbital_vectors[input_data['Orbital'][i]], dtype=torch.float32)
        one_hot_check.append(orbital_vector)

    final_embedding = torch.cat((atom_vector, orbital_vector))
    embeddings.append(np.array(final_embedding))

embd = {'Atoms': input_data['Atom'], 'Orbital': input_data['Orbital'], 'Embeddings': embeddings}
embd = pd.DataFrame(embd)
embd[embd["Atoms"] == "Tl"]

embd.to_csv('./New_embeddings/final_embedding_dim30_one-hot-orbital.csv', index=False, header = True)



  embeddings.append(np.array(final_embedding))


In [8]:
# 200 dim, Induced, Random
embeddings = []

for i in range(len(input_data['Atom'])):
    try:
        atom_vector = torch.tensor(model_atoms_200_induced.vectors[model_atoms_200_induced.dictionary[input_data['Atom'][i]]], dtype=torch.float32)
    except:
        atom_vector = torch.tensor(np.zeros(200), dtype=torch.float32)
        #print(input_data['Atoms'][i])
    orbital_vector = torch.tensor(orbital_vectors_rand[input_data['Orbital'][i]], dtype=torch.float32)
    final_embedding = torch.cat((atom_vector, orbital_vector))
    embeddings.append(np.array(final_embedding))

embd = {'Atoms': input_data['Atom'], 'Orbital': input_data['Orbital'], 'Embeddings': embeddings}
embd = pd.DataFrame(embd)
embd.to_csv('./New_embeddings/final_embedding_dim200_random-orbital_induced.csv', index=False, header = True)

  embeddings.append(np.array(final_embedding))


In [9]:
# 200 dim, Induced, One Hot
embeddings = []

for i in range(len(input_data['Atom'])):
    try:
        atom_vector = torch.tensor(model_atoms_200_induced.vectors[model_atoms_200_induced.dictionary[input_data['Atom'][i]]], dtype=torch.float32)
    except:
        atom_vector = torch.tensor(np.zeros(200), dtype=torch.float32)
        #print(input_data['Atoms'][i])
    orbital_vector = torch.tensor(orbital_vectors_onehot[input_data['Orbital'][i]], dtype=torch.float32)
    final_embedding = torch.cat((atom_vector, orbital_vector))
    embeddings.append(np.array(final_embedding))

embd = {'Atoms': input_data['Atom'], 'Orbital': input_data['Orbital'], 'Embeddings': embeddings}
embd = pd.DataFrame(embd)
embd.to_csv('./New_embeddings/final_embedding_dim200_One-hot-orbital_induced.csv', index=False, header = True)

  embeddings.append(np.array(final_embedding))


In [10]:
# 200 dim, Not-Induced, Random
embeddings = []

for i in range(len(input_data['Atom'])):
    try:
        atom_vector = torch.tensor(model_atoms_200_notinduced.vectors[model_atoms_200_notinduced.dictionary[input_data['Atom'][i]]], dtype=torch.float32)
    except:
        atom_vector = torch.tensor(np.zeros(200), dtype=torch.float32)
        #print(input_data['Atoms'][i])
    orbital_vector = torch.tensor(orbital_vectors_rand[input_data['Orbital'][i]], dtype=torch.float32)
    final_embedding = torch.cat((atom_vector, orbital_vector))
    embeddings.append(np.array(final_embedding))

embd = {'Atoms': input_data['Atom'], 'Orbital': input_data['Orbital'], 'Embeddings': embeddings}
embd = pd.DataFrame(embd)
embd.to_csv('./New_embeddings/final_embedding_dim200_random-orbital_not-induced.csv', index=False, header = True)

  embeddings.append(np.array(final_embedding))


In [11]:
# 200 dim, Not-Induced, One Hot
embeddings = []

for i in range(len(input_data['Atom'])):
    try:
        atom_vector = torch.tensor(model_atoms_200_notinduced.vectors[model_atoms_200_notinduced.dictionary[input_data['Atom'][i]]], dtype=torch.float32)
    except:
        atom_vector = torch.tensor(np.zeros(200), dtype=torch.float32)
        #print(input_data['Atoms'][i])
    orbital_vector = torch.tensor(orbital_vectors_onehot[input_data['Orbital'][i]], dtype=torch.float32)
    final_embedding = torch.cat((atom_vector, orbital_vector))
    embeddings.append(np.array(final_embedding))

embd = {'Atoms': input_data['Atom'], 'Orbital': input_data['Orbital'], 'Embeddings': embeddings}
embd = pd.DataFrame(embd)
embd.to_csv('./New_embeddings/final_embedding_dim200_One-hot-orbital_not-induced.csv', index=False, header = True)

  embeddings.append(np.array(final_embedding))
