In [1]:
'''import sys
sys.path.insert(0, 'nice')
'''
#os.environ['OMP_NUM_THREADS'] = '24'

import numpy as np
import ase.io as ase_io
from ase import Atoms
import tqdm
import time
import copy
from sklearn.linear_model import BayesianRidge
from pathos.multiprocessing import ProcessingPool as Pool
import nice
from nice.blocks import *
from nice.parallelized import *

from matplotlib import pyplot as plt


In [2]:
def process_structures(structures):
    for structure in structures: 
        structure.cell =[120, 120, 120]
        structure.positions += np.asarray((60,60,60))
        structure.pbc=True
        structure.wrap()

In [3]:
structures_train = ase_io.read('structures.xyz', 
                         index = '0:5000')

process_structures(structures_train)

'''structures_val = ase_io.read('structures.xyz', 
                         index = '100:200')'''

#process_structures(structures_val)


"structures_val = ase_io.read('structures.xyz', \n                         index = '100:200')"

In [4]:
energies = np.load('energies.npy')
energies = energies - np.mean(energies)
energies = energies / np.sqrt(np.mean(energies * energies))
energies_train = energies[0:1000]
energies_val = energies[1000:2000]
print(energies_train.shape)
print(energies_val.shape)

(1000,)
(1000,)


In [5]:
HYPERS = {
'interaction_cutoff': 6.3,
'max_radial': 5,
'max_angular': 5,
'gaussian_sigma_type': 'Constant',
'gaussian_sigma_constant': 0.05,
'cutoff_smooth_width': 0.3,
'radial_basis': 'GTO',
}
    

In [6]:
p = Pool(40)

Process ForkPoolWorker-18:
Process ForkPoolWorker-1:
Process ForkPoolWorker-22:
Process ForkPoolWorker-39:
Process ForkPoolWorker-24:
Process ForkPoolWorker-29:
Process ForkPoolWorker-32:
Process ForkPoolWorker-20:
Process ForkPoolWorker-33:
Process ForkPoolWorker-36:
Process ForkPoolWorker-5:
Process ForkPoolWorker-14:
Process ForkPoolWorker-3:
Process ForkPoolWorker-40:
Process ForkPoolWorker-30:
Process ForkPoolWorker-15:
Process ForkPoolWorker-26:
Process ForkPoolWorker-9:
Process ForkPoolWorker-6:
Process ForkPoolWorker-13:
Process ForkPoolWorker-25:
Process ForkPoolWorker-11:
Process ForkPoolWorker-35:
Process ForkPoolWorker-37:
Process ForkPoolWorker-34:
Process ForkPoolWorker-23:
Process ForkPoolWorker-27:
Process ForkPoolWorker-17:
Process ForkPoolWorker-31:
Process ForkPoolWorker-12:
Process ForkPoolWorker-38:
Process ForkPoolWorker-28:
Process ForkPoolWorker-19:
Process ForkPoolWorker-4:
Process ForkPoolWorker-21:
Process ForkPoolWorker-10:
Process ForkPoolWorker-8:
Process 

In [7]:
print(len(structures_train))

5000


In [8]:
coefficients = get_rascal_coefficients_parallelized(p, structures_train, HYPERS, 2)
print(coefficients.shape)
coefficients = coefficients.reshape([len(structures_train), 5, coefficients.shape[1], 
                                     coefficients.shape[2], coefficients.shape[3]])

print(coefficients.shape)

100%|██████████| 50/50 [00:06<00:00,  7.61it/s]


(25000, 10, 6, 11)
(5000, 5, 10, 6, 11)


In [9]:
'''def concatenate_data(datas):
    covariants = [datas[i].covariants_ for i in range(len(datas))]
    covariants = np.concatenate(covariants, axis = 0)
    return Data(covariants, datas[0].actual_sizes_, datas[0].importances_, datas[0].raw_importances_)

def transform_parallelized(transformer, *args, task_size = 100):
    print(len(args))
    tasks = []
    for i in range(0, args[0].shape[0], task_size):
        now = []
        for j in range(len(args)):
            now.append(args[j][i : i + task_size])
        tasks.append(now)
    
    def func(task):
        return transformer.transform(*task)
    
    result = [res for res in tqdm.tqdm(p.imap(func, tasks), total = len(tasks))]
    ans = []
    for i in range(len(result[0])):
        now = [result[j][i] for j in range(len(result))]
        ans.append(concatenate_data(now))
        
     print(len(ans))
    print(ans[0].shape)
    return ans'''

'def concatenate_data(datas):\n    covariants = [datas[i].covariants_ for i in range(len(datas))]\n    covariants = np.concatenate(covariants, axis = 0)\n    return Data(covariants, datas[0].actual_sizes_, datas[0].importances_, datas[0].raw_importances_)\n\ndef transform_parallelized(transformer, *args, task_size = 100):\n    print(len(args))\n    tasks = []\n    for i in range(0, args[0].shape[0], task_size):\n        now = []\n        for j in range(len(args)):\n            now.append(args[j][i : i + task_size])\n        tasks.append(now)\n    \n    def func(task):\n        return transformer.transform(*task)\n    \n    result = [res for res in tqdm.tqdm(p.imap(func, tasks), total = len(tasks))]\n    ans = []\n    for i in range(len(result[0])):\n        now = [result[j][i] for j in range(len(result))]\n        ans.append(concatenate_data(now))\n        \n     print(len(ans))\n    print(ans[0].shape)\n    return ans'

In [18]:
class BSTransformer():
    def __init__(self):
        self.initial_ = InitialTransformer()
        self.pca_0_ = IndividualLambdaPCAsBoth()
        self.expansioner_1_ = ThresholdExpansioner()
        self.pca_1_ = IndividualLambdaPCAsBoth()
        self.expansioner_2_ = ThresholdExpansioner(num_expand = 100000, mode = 'invariants')
        '''self.pca_1_ = IndividualLambdaPCAsBoth(100)
        self.expansioner_2_ = ThresholdExpansioner(num_expand = 100, mode = 'invariants')'''
        
    def fit(self, coefficients):
        data_even_0, data_odd_0 = self.initial_.transform(coefficients)
        self.pca_0_.fit(data_even_0, data_odd_0)
        data_even_0, data_odd_0 = self.pca_0_.transform(data_even_0, data_odd_0, method = 'parallel')
        self.expansioner_1_.fit(data_even_0, data_odd_0, data_even_0, data_odd_0)
        data_even_1, data_odd_1 = self.expansioner_1_.transform(data_even_0, data_odd_0, data_even_0, data_odd_0)
        self.pca_1_.fit(data_even_1, data_odd_1)
        data_even_1, data_odd_1 = self.pca_1_.transform(data_even_1, data_odd_1, method = 'parallel')
        self.expansioner_2_.fit(data_even_1, data_odd_1, data_even_0, data_odd_0)
        
    def transform(self, coefficients):                
        data_even_0, data_odd_0 = self.initial_.transform(coefficients)
        data_even_0_t, data_odd_0_t = self.pca_0_.transform(data_even_0, data_odd_0)
        data_even_1, data_odd_1 = self.expansioner_1_.transform(data_even_0_t, data_odd_0_t,
                                                                data_even_0_t, data_odd_0_t)
        data_even_1_t, data_odd_1_t = self.pca_1_.transform(data_even_1, data_odd_1)
        invariants_even_2, _ = self.expansioner_2_.transform(data_even_1_t, data_odd_1_t, 
                                                             data_even_0_t, data_odd_0_t)
        return [data_even_0.covariants_[:, :data_even_0.actual_sizes_[0], 0, 0], 
                data_even_1.covariants_[:, :data_even_1.actual_sizes_[0], 0, 0],
                invariants_even_2]
        

In [19]:
def split_coefficients(coefficients):
    c_coefficients = coefficients[:, 0]
    h_coefficients = coefficients[:, 1:]
   
    h_coefficients = np.reshape(h_coefficients, [coefficients.shape[0] * 4, 
                                              coefficients.shape[2], coefficients.shape[3],
                                              coefficients.shape[4]])
    return c_coefficients, h_coefficients

In [20]:
c_coefficients, h_coefficients = split_coefficients(coefficients)
print(c_coefficients.shape)

(5000, 10, 6, 11)


In [21]:
'''begin = time.time()
pst = BSTransformer()
pst.fit(c_coefficients)
print(time.time() - begin)'''

'begin = time.time()\npst = BSTransformer()\npst.fit(c_coefficients)\nprint(time.time() - begin)'

In [22]:
'''begin = time.time()
res = pst.transform(c_coefficients)
print(res[2].shape)
print(time.time() - begin)'''

'begin = time.time()\nres = pst.transform(c_coefficients)\nprint(res[2].shape)\nprint(time.time() - begin)'

In [23]:
def fit(coefficients, num_to_fit):
    c_coefficients, h_coefficients = split_coefficients(coefficients)
    print(c_coefficients.shape)
    begin = time.time()
    c_trans = BSTransformer()
    c_trans.fit(c_coefficients[:num_to_fit])
    print(time.time() - begin)
    
    begin = time.time()
    h_trans = BSTransformer()
    h_trans.fit(h_coefficients[:num_to_fit])
    print(time.time() - begin)
    
    return c_trans, h_trans



In [24]:
c_coefficients, h_coefficients = split_coefficients(coefficients)


In [25]:
c_trans, h_trans = fit(coefficients, 5000)



(5000, 10, 6, 11)


Exception in thread Thread-4:
Traceback (most recent call last):
  File "/home/pozdn/psi4conda/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/pozdn/psi4conda/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/pozdn/psi4conda/lib/python3.6/site-packages/multiprocess/pool.py", line 405, in _handle_workers
    pool._maintain_pool()
  File "/home/pozdn/psi4conda/lib/python3.6/site-packages/multiprocess/pool.py", line 246, in _maintain_pool
    self._repopulate_pool()
  File "/home/pozdn/psi4conda/lib/python3.6/site-packages/multiprocess/pool.py", line 239, in _repopulate_pool
    w.start()
  File "/home/pozdn/psi4conda/lib/python3.6/site-packages/multiprocess/process.py", line 105, in start
    self._popen = self._Popen(self)
  File "/home/pozdn/psi4conda/lib/python3.6/site-packages/multiprocess/context.py", line 277, in _Popen
    return Popen(process_obj)
  File "/home/pozdn/psi4conda/lib/pyth

KeyboardInterrupt: 

In [None]:
import pickle
with open("nu_2_data/transformers", "wb") as f:
    pickle.dump([c_trans, h_trans], f, protocol = 4)

In [None]:
def transform_full(transformer, coefficients, task_size = 100):
    tasks = [coefficients[i : i + task_size]
             for i in range(0, coefficients.shape[0], task_size)]  
    print(len(tasks))
    result = [res for res in tqdm.tqdm(p.imap(transformer.transform, tasks), total = len(tasks))]
    ans = []
    for i in range(len(result[0])):
        now = [result[j][i] for j in range(len(result))]
        ans.append(np.concatenate(now, axis = 0))
        
    return ans

In [None]:
c_coefficients, h_coefficients = split_coefficients(coefficients)
#result = np.concatenate(transform_full(c_trans, c_coefficients), axis = 1)
result = transform_full(c_trans, c_coefficients)
'''print(result[0].shape)
print(result[1].shape)
print(result[2].shape)'''
#print(result.shape)

In [None]:
with open("nu_2_data/zpbs_c", "wb") as f:
    pickle.dump(result, f, protocol = 4)

In [None]:
'''import pickle
with open("nu_2_data/transformers", "rb") as f:
    c_trans, h_trans = pickle.load(f)'''

In [None]:
result = transform_full(h_trans, h_coefficients)
#print(result.shape)

In [None]:
with open("nu_2_data/zpbs_h", "wb") as f:
    pickle.dump(result, f, protocol = 4)

In [None]:
'''class BSTransformer():
    def __init__(self):
        self.initial_ = InitialTransformer()
        self.pca_0_ = IndividualLambdaPCAsBoth()
        self.expansioner_1_ = ThresholdExpansioner()
        self.pca_1_ = IndividualLambdaPCAsBoth()
        self.expansioner_2_ = ThresholdExpansioner(num_expand = 100000, mode = 'invariants')
        
    def fit(self, coefficients):
        data_even_0, data_odd_0 = transform_parallelized(self.initial_, coefficients)
        self.pca_0_.fit(data_even_0, data_odd_0)
        data_even_0, data_odd_0 = transform_parallelized(self.pca_0_, data_even_0, data_odd_0)
        #print(data_even_0.covariants_.shape[0])
        self.expansioner_1_.fit(data_even_0, data_odd_0, data_even_0, data_odd_0)
        data_even_1, data_odd_1 = transform_parallelized(self.expansioner_1_, data_even_0, data_odd_0, data_even_0, data_odd_0)
        self.pca_1_.fit(data_even_1, data_odd_1)
        data_even_1, data_odd_1 = transform_parallelized(self.pca_1_, data_even_1, data_odd_1)
        self.expansioner_2_.fit(data_even_1, data_odd_1, data_even_0, data_odd_0)
        
    def transform(self, coefficients):                
        data_even_0, data_odd_0 = self.initial_.transform(coefficients)
        data_even_0_t, data_odd_0_t = self.pca_0_.transform(data_even_0, data_odd_0)
        data_even_1, data_odd_1 = self.expansioner_1_.transform(data_even_0_t, data_odd_0_t,
                                                                data_even_0_t, data_odd_0_t)
        data_even_1_t, data_odd_1_t = self.pca_1_.transform(data_even_1, data_odd_1)
        invariants_even_2, _ = self.expansioner_2_.transform(data_even_1_t, data_odd_1_t, 
                                                             data_even_0_t, data_odd_0_t)
        return [data_even_0.covariants_[:, :data_even_0.actual_sizes_[0], 0, 0], 
                data_even_1.covariants_[:, :data_even_1.actual_sizes_[0], 0, 0],
                invariants_even_2]
        '''

In [None]:
a = np.array([2, 4, 1, 35, 2, 3145, 1, -1234])
b = np.argsort(a)
print(a[b])
print(a[b[::-1]])