# Make RACs from initial structure

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pickle

from collections import defaultdict
from molSimplify.Informatics.autocorrelation import*


In [2]:
def make_rac(xyz_file, m_depth, l_depth, is_oct):
    properties = ['electronegativity', 'size', 'polarizability', 'nuclear_charge']
    this_mol = mol3D() # mol3D instance
    this_mol.readfromxyz(xyz_file)
    feature_names = []
    mc_corrs = np.zeros(shape=(len(properties), (m_depth+1)))
    metal_idx = this_mol.findMetal()[0]
    mc_delta_metricz =  np.zeros(shape=(len(properties), m_depth))
    for idx, p in enumerate(properties):
        delta_list = list(np.asarray(atom_only_deltametric(this_mol, p, m_depth, metal_idx, oct=is_oct)).flatten())
        del delta_list[0]
        mc_corrs[idx] = np.asarray(atom_only_autocorrelation(this_mol, p, m_depth, metal_idx, oct=is_oct)).flatten()
        name_of_idx = ["MC-mult-{}-{}".format(p, x) for x in range(0, m_depth+1)]
        mc_delta_metricz[idx] = delta_list
        feature_names.extend(name_of_idx)
        name_of_idx_diff = ["MC-diff-{}-{}".format(p, x) for x in range(1, m_depth+1)]
        feature_names.extend(name_of_idx_diff)
        
    if is_oct:
        num_connectors = 6
    else:
        num_connectors = 5
    distances = []
    origin = this_mol.coordsvect()[metal_idx]
    for xyz in this_mol.coordsvect():
        distances.append(np.sqrt((xyz[0]-origin[0])**2+(xyz[1]-origin[1])**2+(xyz[2]-origin[2])**2))

    nearest_neighbours = np.argpartition(distances, num_connectors)
    nn = [x for x in nearest_neighbours[:num_connectors+1] if x != 0]
    rest_of_autoz = np.zeros(shape=(len(properties), l_depth+1))
    rest_of_deltas = np.zeros(shape=(len(properties), l_depth))
    for idx, p in enumerate(properties):
        rest_of_autoz[idx] = atom_only_autocorrelation(this_mol, p, l_depth, nn, oct=is_oct)
        rest_of_deltas[idx] = atom_only_deltametric(this_mol, p, l_depth, nn)[1:]
        name_of_idx = ["LC-mult-{}-{}".format(p, x) for x in range(0, l_depth+1)]
        name_of_idx_diff = ["LC-diff-{}-{}".format(p, x) for x in range(1, l_depth+1)]
        feature_names.extend(name_of_idx)
        
    
    rac_res = np.concatenate((mc_corrs, mc_delta_metricz, rest_of_autoz, rest_of_deltas),
                                axis=None)

    return rac_res, feature_names

Now we define different racs with differing feature depths so we can perform the gridsearch in rac_depth_search.ipynb

In [3]:
mc_depths = [2, 3, 4]
lc_depths = [0,  1]

oer_desc_data = pickle.load(open("racs_and_desc/oer_desc_data.p", "rb"),)
name2oer_desc_and_rac = defaultdict()
for mc_d in mc_depths:
    for lc_d in lc_depths:
        racs = []
        oer_desc_for_ml = []
        cat_names_for_ml = []
        for name in oer_desc_data:
            oer_desc = oer_desc_data[name][0]
            rac = np.asarray(make_rac(oer_desc_data[name][1], mc_d, lc_d, is_oct=True)[0])
            name2oer_desc_and_rac[name] = (oer_desc, rac)
        pickle.dump(name2oer_desc_and_rac, open("racs_and_desc/data_mc{}_lc{}.p".format(mc_d, lc_d), "wb"))
        # overwrite for the next iteration
        name2oer_desc_and_rac = defaultdict()
    