# Imports and Settings

In [1]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import os
from scipy.spatial.distance import squareform, cdist
import time
from numpy.testing import *

In [2]:
from Code.DataGeneration.printer import ProgressTimer
from Code.DataGeneration.saver import create_path
from Code.DataGeneration.transform import get_spherical, change_base, get_input_data

In [3]:
path_to_db = "./Dataset/iso17/reference.db"

In [4]:
from ase.db import connect

molecules = []
energies = []
with connect(path_to_db) as conn:
    for row in conn.select(5):
        molecules.append(np.hstack((row['numbers'].reshape((19, 1)), row['positions'])))
        energies.append(row['total_energy'])

In [5]:
np.array(molecules).shape

(1, 19, 4)

In [6]:
molecules

[array([[ 8.        , -3.13311987,  1.88354817,  3.12766679],
        [ 6.        , -2.24705161,  1.93852065,  1.97802995],
        [ 6.        , -0.782912  ,  1.58546259,  2.15014356],
        [ 6.        , -0.65984825,  1.29204325,  0.51449211],
        [ 8.        , -0.27095204,  0.04593806, -0.04992813],
        [ 6.        , -0.11965932,  2.44181756, -0.23724109],
        [ 6.        , -1.08130266,  3.68052005,  0.27874998],
        [ 6.        , -2.31743849,  2.80620132,  0.86798379],
        [ 6.        , -2.21857731,  1.35554898,  0.55055286],
        [ 1.        , -2.96925792,  2.73661176,  3.54130577],
        [ 1.        , -0.27153608,  2.40714079,  2.58637797],
        [ 1.        , -0.64130634,  0.66409026,  2.81151566],
        [ 1.        ,  0.17818655,  0.27267158, -0.90326441],
        [ 1.        ,  0.93555285,  2.60279125, -0.05197857],
        [ 1.        , -0.35827703,  2.41891948, -1.26786949],
        [ 1.        , -0.57402327,  4.26807961,  1.08342976],
        

## Create a Dataframe from Input files

In [7]:
#list_ = []
#for file in filenames:
#    filepath = os.path.join(path_to_files, file)
#    try:
#        df_single = pd.read_csv(filepath, skiprows=2,
#                               skipfooter=3, delimiter='\t',
#                               names=['atomtype', 'x', 'y', 'z', 'charge'], 
#                               dtype=dict(atomtype=str, x=float, y=float, z=float, charge=float))
#    except:
#        print(file)
#    df_single['file'] = file
#    list_.append(df_single)
#df_all = pd.concat(list_)
#df_all.head(5)

## Prepare raw Data for Transformation

In [8]:
#n_atoms = 19
#h_atoms = 10
#mask_H = dict(H='ZZZ_H')
#df_all = df_all.replace(dict(atomtype=mask_H))
## sort by file and atomtype
#df_all = df_all.sort_values(['file', 'atomtype']).reset_index(drop=True)
## create file id column
#df_all['file_id'] = (df_all.index) // n_atoms + 1

In [9]:
#df_all.head(25)

## Transform Dataframe to Numpy Array for faster Calculations

In [10]:
#raw_matrix = df_all[['file_id', 'atomtype', 'x', 'y', 'z', 'charge']].values

# Transformation Functions

# Run Calculations

In [11]:
start = time.time()
network_in = np.array(get_input_data(molecules))
print('time: {}'.format(time.time()-start))

NameError: name 'molecule' is not defined

In [None]:
network_in.shape

## Get Y-labels

In [None]:
len(energies)

## Save arrays to file

In [None]:
data_path = './Dataset/c702h10_X'
label_path = './Dataset/c702h10_Y'

In [None]:
np.save(data_path, network_in)
np.save(label_path, energies)

# Testing

## Test Functions

In [None]:
def test_get_spherical():
    test_positions = np.array([[0, 1, 2],
                               [1, 1, 1],
                               [-1, 2, 1]])
    val_result = np.array([[1/np.sqrt(5), np.cos(np.arccos(2/np.sqrt(5))),
                            np.cos(np.pi/2), np.sin(np.pi/2)],
                           [1/np.sqrt(3), np.cos(np.arccos(1/np.sqrt(3))),
                            np.cos(np.arctan(1)), np.sin(np.arctan(1))],
                           [1/np.sqrt(6), np.cos(np.arccos(1/np.sqrt(6))),
                            np.cos(np.arctan(-2) + np.pi), np.sin(np.arctan(-2) + np.pi)]])
    assert_array_almost_equal(val_result, get_spherical(test_positions)) 

In [None]:
def test_change_base():
    test_positions = np.array([[0, 1, 2],
                               [1, 1, 1],
                               [-1, 2, 1]])
    x = np.array([1, 1, 0])
    y = np.array([0, 0, 1])
    z = np.array([2, 1, 3])
    val_result = np.array([[-7.,-13.,4.],
                           [-8.,-17.,5.],
                           [-4., -8.,2.]])
    o = np.array([-1, 4, 3])
    assert_array_almost_equal(val_result, change_base(test_positions, x, y, z, o))

In [None]:
def test_get_input_data():
    test_mol = np.array([['C', 1, 1, 1],
                         ['O', 1, 0, 0],
                         ['O', 0, 3, 0],
                         ['ZZZ_H', 0, 2, 0]])
    return get_input_data(test_mol, 4)

## Run Tests

In [None]:
test_mol = molecules[0]

In [None]:
test_mol = test_mol[test_mol[:,0].argsort()]

In [None]:
results = np.zeros((19, 72))

In [None]:
positions = test_mol[:, 1:]
for i in range(10):
    dists = cdist(positions[np.newaxis, i], positions[10:])
    one, two = dists.argsort().reshape(9)[:2] + 10
    zero = i
    x = positions[one] - positions[zero]
    z = np.cross(x, positions[two] - positions[zero])
    y = np.cross(z, x)
    x /= np.linalg.norm(x)
    y /= np.linalg.norm(y)
    z /= np.linalg.norm(z)
    print(i, one, two, positions[one], positions[two])
    others = np.vstack((test_mol[:i], test_mol[i+1:]))
    ch_b_others = change_base(others[:, 1:], x, y, z, positions[zero])
    sph_others = get_spherical(ch_b_others)
    sph_others = sph_others[sph_others[:, 0].argsort()]
    others = others[sph_others[:, 0].argsort()]
    sph_others = sph_others[others[:, 0].argsort()]
    results[i] = sph_others.reshape(72)

In [None]:
val_result = results[:10]

In [None]:
nearest = cdist(positions[np.newaxis, 0], positions[10:])#.argsort().reshape(9) + 10

In [None]:
nearest

In [None]:
test_result = np.array(get_input_data(test_mol[np.newaxis])).reshape(19, 72)[:10]

In [None]:
test_result.sum(axis=1)

In [None]:
val_result.sum(axis=1)

In [None]:
test_result[0]

In [None]:
val_result[0]

In [None]:
assert_array_almost_equal(test_result, val_result)

In [None]:
test = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
test.reshape(8)

In [None]:
test_result.sum()

In [None]:
np.isnan(test_result)

In [None]:
val_result.sum()

In [None]:
diff = test_result-val_result

In [None]:
for i in range(10):
    for j in range(72):
        if np.abs(diff[i, j]) <= 0.00001:
            print(i, j)

In [None]:
test_result[:, 1]

In [None]:
val_result[:, 1]

In [None]:
test_get_spherical()

In [None]:
test_change_base()

In [None]:
test_get_input_data()

In [None]:
np.cross(np.array([-1, 2, -1]), np.array([0, -1, -1]))

In [None]:
x = np.array([[1, 1, 1], [1, 0, 0], [0, 3, 0], [0, 2, 0]])

In [None]:
cdist(x, x)#.argsort()

In [None]:
test_mol = mol