In [132]:
%load_ext line_profiler
%load_ext autoreload

%autoreload 2
%reload_ext autoreload

# === IMPORTS ===

import logging, sys
import torch
import seqm
from ase.io import read as ase_read
from seqm.seqm_functions.constants import Constants
from seqm.Molecule import Molecule
from seqm.ElectronicStructure import Electronic_Structure
from termcolor import colored


from seqm.seqm_functions.fock import fock
from seqm.seqm_functions.pack import unpack
import seqm.seqm_functions.pack as pack
import torch.nn.functional as F

#=== TORCH OPTIONS ===

device = torch.device('cpu')

torch.set_default_dtype(torch.float64)
# if torch.cuda.is_available():
#     device = torch.device('cuda')
# else:
#     device = torch.device('cpu')
dtype = torch.float64
# torch.set_printoptions(precision=5, linewidth=200, profile="full", sci_mode=False)
torch.set_printoptions(precision=5, linewidth=200, sci_mode=False, profile = 'short')

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [135]:
torch.norm

<function torch.functional.norm(input, p: Union[float, str, NoneType] = 'fro', dim=None, keepdim=False, out=None, dtype=None)>

In [6]:
# colored logging with custom level QM for deeper routines

logging.basicConfig(level=logging.DEBUG,
                    format='%(funcName)s : %(lineno)d : %(levelname)s : %(message)s')

QM1 = evel=logging.DEBUG - 3 # informal level of depth; QM1 - almost always, usually outside of loops
QM2 = evel=logging.DEBUG - 4 #                          QM2 - sometimes, in the loops
QM3 = evel=logging.DEBUG - 5

logging.addLevelName(QM1, "QM1")
def qm1(self, message, *args, **kwargs):
    if self.isEnabledFor(QM1 ):
        self._log(QM1, message, args, **kwargs) 
        
logging.addLevelName(QM2, "QM2")
def qm2(self, message, *args, **kwargs):
    if self.isEnabledFor(QM2):
        self._log(QM2, message, args, **kwargs) 
 
logging.addLevelName(QM3, "QM3")
def qm3(self, message, *args, **kwargs):
    if self.isEnabledFor(QM3 ):
        self._log(QM3, message, args, **kwargs) 
           
        
logging.Logger.qm1 = qm1   
logging.Logger.qm2 = qm2
logging.Logger.qm3 = qm3
  
logger = logging.getLogger()

                              
colors = {'qm'        : ('cyan',     None, None),
          'matrix'    : ('blue',     None, ['bold']),
          'vector'    : ('yellow',   None, ['bold']),
          'evals'     : ('green',    None, ['bold']),
          'warn'     : ('red',    None, ['bold'])
          }

def fmt_log(data, message, fmt):
    """
    fmt_log : formats log message with color and style using termcolor module

    Args:
        data (any): data to print
        message (str or None): message to print, pass None if no message is needed
        fmt (str): style from colors dict

    Returns:
        str: formatted string with color and style
    """    

    if type(data) is list or type(data) is tuple or type(data) is torch.Tensor:
        
        mes = f'{colored(message, colors[fmt][0], colors[fmt][1], attrs=colors[fmt][2])}\n' # add new line to align array
    else:
        mes = f'{colored(message, colors[fmt][0], colors[fmt][1], attrs=colors[fmt][2])} : '
        
    if data == None:
        return mes
    else:
        return mes + str(colored(data, colors[fmt][0], colors[fmt][1], attrs=colors[fmt][2]))

### log

07/13/23 - QM part seems to be wortking fine
full diagonalization agrees with NEXMD
small guess space misses relevant vectors, but large guess includes them


PASCAL 1 COULD BE INCORRECT

### QM routines

In [7]:
def run_seqm_1mol(xyz):
    """
    run_seqm_1mol : run PYSEQM for a single molecule

    Args:
        xyz (str): path to xyz file

    Returns:
        Molecule object: PYSEQM object with molecule data
    """    
    
    atoms = ase_read(xyz)
    species = torch.tensor([atoms.get_atomic_numbers()], dtype=torch.long, device=device)
    coordinates = torch.tensor([atoms.get_positions()], dtype=dtype, device=device)
    
    const = Constants().to(device)

    elements = [0]+sorted(set(species.reshape(-1).tolist()))

    seqm_parameters = {
                    'method' : 'PM3',  # AM1, MNDO, PM#
                    'scf_eps' : 1.0e-6,  # unit eV, change of electric energy, as nuclear energy doesnt' change during SCF
                    'scf_converger' : [2,0.0], # converger used for scf loop
                                            # [0, 0.1], [0, alpha] constant mixing, P = alpha*P + (1.0-alpha)*Pnew
                                            # [1], adaptive mixing
                                            # [2], adaptive mixing, then pulay
                    'sp2' : [False, 1.0e-5],  # whether to use sp2 algorithm in scf loop,
                                                #[True, eps] or [False], eps for SP2 conve criteria
                    'elements' : elements, #[0,1,6,8],
                    'learned' : [], # learned parameters name list, e.g ['U_ss']
                    #'parameter_file_dir' : '../seqm/params/', # file directory for other required parameters
                    'pair_outer_cutoff' : 1.0e10, # consistent with the unit on coordinates
                    'eig' : True,
                    'excited' : True,
                    }

    mol = seqm.Molecule.Molecule(const, seqm_parameters, coordinates, species).to(device)

    ### Create electronic structure driver:
    esdriver = Electronic_Structure(seqm_parameters).to(device)

    ### Run esdriver on m:
    esdriver(mol)
    
    return mol

In [53]:
def run_seqm(xyz_list):
    """
    run_seqm_1mol : run PYSEQM for a single molecule

    Args:
        xyz (str): path to xyz file

    Returns:
        Molecule object: PYSEQM object with molecule data
    """    
    
    atoms = [ase_read(x) for x in xyz_list]
    species = torch.tensor([atoms[x].get_atomic_numbers() for x in range(len(atoms))], dtype=torch.long, device=device)
    coordinates = torch.tensor([atoms[x].get_positions() for x in range(len(atoms))], dtype=dtype, device=device)
    
    const = Constants().to(device)

    elements = [0]+sorted(set(species.reshape(-1).tolist()))

    seqm_parameters = {
                    'method' : 'PM3',  # AM1, MNDO, PM#
                    'scf_eps' : 1.0e-6,  # unit eV, change of electric energy, as nuclear energy doesnt' change during SCF
                    'scf_converger' : [2,0.0], # converger used for scf loop
                                            # [0, 0.1], [0, alpha] constant mixing, P = alpha*P + (1.0-alpha)*Pnew
                                            # [1], adaptive mixing
                                            # [2], adaptive mixing, then pulay
                    'sp2' : [False, 1.0e-5],  # whether to use sp2 algorithm in scf loop,
                                                #[True, eps] or [False], eps for SP2 conve criteria
                    'elements' : elements, #[0,1,6,8],
                    'learned' : [], # learned parameters name list, e.g ['U_ss']
                    #'parameter_file_dir' : '../seqm/params/', # file directory for other required parameters
                    'pair_outer_cutoff' : 1.0e10, # consistent with the unit on coordinates
                    'eig' : True,
                    'excited' : True,
                    }

    mol = seqm.Molecule.Molecule(const, seqm_parameters, coordinates, species).to(device)

    ### Create electronic structure driver:
    esdriver = Electronic_Structure(seqm_parameters).to(device)

    ### Run esdriver on m:
    esdriver(mol)
    
    return mol

In [54]:
molecules = run_seqm(['h2o.xyz', 'h2o.xyz'])

In [10]:
molecules.nocc

tensor([4, 4])

In [11]:
mol = run_seqm_1mol('h2o.xyz')

In [12]:
mol

Molecule(
  (const): Constants()
  (parser): Parser()
)

### AUX routines

### DAVIDSON routines

In [13]:
logger.setLevel(logging.DEBUG)  # custom logging level; lower than DEBUG
                               # printed above QM (QM, DEBUG, INFO, etc)

In [14]:
# REFACTORING


In [128]:
from seqm.seqm_functions.excited.ortho import orthogonalize_matrix as orthogonal
from seqm.seqm_functions.excited.hamiltonian import gen_V
from seqm.seqm_functions.excited.hamiltonian import form_cis
import seqm.seqm_functions.excited
from seqm.seqm_functions.excited.orb_transform import mo2ao

In [168]:
def davidson(device, mol, N_exc, keep_n, n_V_max,  max_iter, tol):
    """
    Davidson algorithm for solving eigenvalue problem of large sparse diagonally dominant matrices
    Hamiltonian is not generated or stored explicitly, only matrix-vector products are used on-the fly:
    guess space V should be orthogonalized at each iteration
    M (projection of smaller size) is V.T @ H @ V 
    #! RPA (TDHF) is not implemented yet, non-Hermitian (non-symmetric), requires also left eigenvectors 
    note that notation differes between implementations: V.T x A x V is bAb
    # TODO: 1) check if convergence of e_vals is needed
    # TODO: 2) vectorize and optimize orthogonalization
    # TODO: 3) check if some vectors should be dropped 
    # TODO: 4) eliminate loops 
    # TODO: 5) check if whole M should be regenerated, or only sub-blocks corresponding to new guess vectors
    # TODO: 6) add parameter checker like Krylov dims << N_cis

    Args:
        mol (PYSEQM object): object to hold all qm data from PYSEQM
        N_exc (int)        : number of excited states to calculate
        keep_n (int)       : number of e_vals, e_vecs to keep at each iteration
        n_V_max (int)      : maximum size of Krylov subspace, 
                             projected matrix will be no more than M(n_V_max x n_V_max)
        max_iter (int)     : maximum number of iterations in Davidson
        tol (float)        : treshold for residual
        
    Returns:
        tuple of tensors: eigenvalues (excitation energies in default units, eV) and eigenvectors 
    """    
    
    n_V_start = N_exc * 2 # dimension of Krylov subspace, analogue of nd1  
    N_cis = mol.nocc * mol.nvirt
    term = False  # terminate algorithm
    iter = 0
    
    V = gen_V(device, mol, N_cis, n_V_start) # generate initial guess, V here #! should be renamed
    diag = None # create diagonal of M only once
    
    while iter < max_iter and not term: # Davidson loop
        
        if iter > 0: # skip first step, as initial V is orthogonal
            V = torch.squeeze(V) #TODO: remove
            V = orthogonal(V)
            V = torch.unsqueeze(V, 0)
        print('=================================', flush=True)
        print(colored(f' ITERATION : {iter} ', 'red', 'on_white', attrs=['bold']), flush=True)
        print('SUBSPACE SIZE V: ', V.shape, flush=True)
        print('=================================')
        # ---------- form A x b product --------------------
        H_V = torch.zeros((mol.nmol, N_cis, V.shape[2]), device=device) #! formerly L_xi
                                                                        # Hamiltonian @ V 
        #logger.qm1(fmt_log(V, 'V BEFORE L_xi after ORTO', 'qm'))
        #L_xi = form_cis(device, V, mol, N_cis, N_rpa)
        
        for i in range(V.shape[2]): 
            # print('=================================', flush=True)
            logger.qm3('Lxi iterations=%s', i)
            H_V[:, :, i] = form_cis(device, V[:, : ,i], mol, N_cis)
        H_V = torch.squeeze(H_V)
        
        print('H_V shape: ', H_V.shape)

        # print('H_V\n', H_V)
        # raise ValueError(' ### STOP ###')
        # logger.qm1(fmt_log(right_V.shape, 'right_V shape', 'matrix'))
        # logger.qm1(fmt_log(right_V, 'right_V', 'matrix'))       
        # ---------- form b.T x Ab product --------------------
        V = torch.squeeze(V) # TODO: remove squeeze
        print('V shape after SQUEZZE: ', V.shape)
        M = V.T @ H_V
        print('M shape: ', M.shape)
        # logger.debug(fmt_log(M.shape, 'M shape', 'qm'))
        # logger.debug(fmt_log(M, 'M', 'qm'))
        if iter == 0:
            diag = torch.diag(M) # create diagonal only once
            
        iter += 1
        
        logger.qm1(fmt_log(diag, 'diag', 'qm'))
    
        # ---------- diagonalize projection M --------------------
        r_eval, r_evec = torch.linalg.eigh(M) # find eigenvalues and eigenvectors
       
        r_eval = r_eval.real
        r_evec = r_evec.real
        r_eval, r_idx = torch.sort(r_eval, descending=False) # sort eigenvalues in ascending order
        logger.debug(fmt_log(r_eval, 'RIGHT EVALS', 'evals'))
        r_evec = r_evec[:, r_idx] # sort eigenvectors accordingly
    
        e_val_n = r_eval[:keep_n] # keep only the lowest keep_n eigenvalues; full are still stored as e_val
        e_vec_n = r_evec[:, :keep_n]
        resids = torch.zeros(V.shape[0], len(e_val_n)) # account for left and right evecs

        # ---------- calculate residual vectors --------------------
        for j in range(len(e_val_n)): # calc residuals 
            resids[:,j] = H_V @ e_vec_n[:,j] - e_val_n[j] * (V @ e_vec_n[:,j])
            
       # logger.debug(fmt_log(resids, 'resids', 'matrix'))     
        resids_norms_r = torch.tensor([resids[:,x].norm() for x in range(resids.shape[1])])

        # ---------- expand guess space V buy not-converged resids --------------------
        # !!! PROBABLY HIGHLY INEFFICIENT !!! 
        print('V shape LINE 101: ', V.shape)
        if torch.any(resids_norms_r > tol):
            mask_r = resids_norms_r >= tol
            large_res_r = resids[:,mask_r] # residuals larger than tol
           # logger.debug(fmt_log(large_res_r, 'LARGE RESIDUALS', 'vector'))           
            large_res_r.to(device)
            cor_e_val_r = e_val_n[mask_r] # corresponding eigenvalues !!! check if matches
            
            # ------keep adding new resids --------------------
            if V.shape[1] <= n_V_max:     

                    for j in range(large_res_r.shape[1]):
                        if V.shape[1] <= n_V_max:
                            s = large_res_r[:,j] # conditioned residuals > tol

                            if s.norm() >= tol:
                                logger.debug(fmt_log((s.norm().item()), 'NORM of RESIDUAL', 'warn'))
                                denom = (diag[j] - cor_e_val_r[j])
                                denom.to(device) 
                                s = s/denom # conditioned residuals
                                s.to(device)
                                # logger.debug(fmt_log(s.norm(), 'NORM OF NEW RESIDUAAL', 'vector'))
                                V = torch.column_stack((V, s/s.norm()))
                            else:
                                pass
                #    V = torch.unsqueeze(V, 0) # TODO: remove
                    print('V shape after expansion LINE 125: ', V.shape)

            # ------ collapse (restart) if space V is too large; mix eigenvectors with V------------
            else:
                logger.debug(fmt_log(None, '!!!! MAX subspace reached !!!!', 'warn'))
                #logger.debug(fmt_log(V, 'V before collapse', 'qm'))

                V =  V @ r_evec[:, :n_V_start]
              #  V = torch.unsqueeze(V, 0) # TODO: remove
                logger.debug(fmt_log(V.shape, 'V shape after restart', 'qm'))
                #logger.debug(fmt_log(V, 'V AFTER collapse', 'qm'))

                continue

        else:
            term = True
            print('============================', flush=True)
            print('all residuals are below tolerance')
            print('DAVIDSON ALGORITHM CONVERGED', flush=True)
            print('============================', flush=True)

            return r_eval, r_evec

    # runs after big loop if did not converge
    print('============================', flush=True)
    print('!!! DAVIDSON ALGORITHM DID NOT CONVERGE !!!', flush=True)
    print('============================', flush=True)
    
    return r_eval, r_evec

In [69]:
# mol = run_seqm_1mol('c6h6.xyz')
# eval, _ = davidson(mol = mol, 
#                    N_exc = 8,
#                    keep_n = 4,
#                    n_V_max = 50, 
#                    max_iter = 50, 
#                    tol = 1e-6)

%reload_ext autoreload
%autoreload 2


logger.debug(fmt_log(eval, 'FINAL eval ', 'evals'))
from seqm.seqm_functions.excited import ortho as orthogonalize
from seqm.seqm_functions.excited.hamiltonian import gen_V
from seqm.seqm_functions.excited.hamiltonian import form_cis
from seqm.seqm_functions.excited.orb_transform import mo2ao



mol = run_seqm_1mol('h2o.xyz')
eval, _ = davidson(device = 'cpu', 
                   mol = mol, 
                   N_exc = 3,
                   keep_n = 2,
                   n_V_max = 10, 
                   max_iter = 3, 
                   tol = -1e-6)

logger.debug(fmt_log(eval, 'FINAL eval ', 'evals'))

<module> : 13 : DEBUG : [1m[32mFINAL eval [0m
[1m[32mtensor([ 5.94858,  6.83013,  9.23851,  9.99968, 11.31665, 12.61708], grad_fn=<SortBackward0>)[0m


 == GEN V ==
V shape torch.Size([1, 8, 6])
V
 tensor([[[0., 1., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0.]]])
[1m[47m[31m ITERATION : 0 [0m
SUBSPACE SIZE V:  torch.Size([1, 8, 6])


TypeError: view(): argument 'size' must be tuple of ints, but found element of type Tensor at pos 3

In [56]:
# mol = run_seqm_1mol('c6h6.xyz')
# # eval, _ = davidson(mol = mol, 
# #                    N_exc = 8,
# #                    keep_n = 4,
# #                    n_V_max = 50, 
# #                    max_iter = 50, 
# #                    tol = 1e-6)

# %reload_ext autoreload
# %autoreload 2


# logger.debug(fmt_log(eval, 'FINAL eval ', 'evals'))
# from seqm.seqm_functions.excited import ortho as orthogonalize
# from seqm.seqm_functions.excited.hamiltonian import gen_V
# from seqm.seqm_functions.excited.hamiltonian import form_cis
# from seqm.seqm_functions.excited.orb_transform import mo2ao



# mol = run_seqm(['h2o.xyz', 'h2o.xyz'])
# eval, _ = davidson(device = 'cpu', 
#                    mol = mol, 
#                    N_exc = 3,
#                    keep_n = 2,
#                    n_V_max = 10, 
#                    max_iter = 3, 
#                    tol = -1e-6)

# logger.debug(fmt_log(eval, 'FINAL eval ', 'evals'))

<module> : 13 : DEBUG : [1m[32mFINAL eval [0m
[1m[32mtensor([ 5.94858,  6.83013,  9.23851,  9.99968, 11.31665, 12.61708], grad_fn=<SortBackward0>)[0m


TypeError: zeros(): argument 'size' must be tuple of ints, but found element of type Tensor at pos 2

In [167]:
# mol = run_seqm_1mol('c6h6.xyz')
# eval, _ = davidson(mol = mol, 
#                    N_exc = 8,
#                    keep_n = 4,
#                    n_V_max = 50, 
#                    max_iter = 50, 
#                    tol = 1e-6)

%reload_ext autoreload
%autoreload 2


logger.debug(fmt_log(eval, 'FINAL eval ', 'evals'))
from seqm.seqm_functions.excited import ortho as orthogonalize
from seqm.seqm_functions.excited.hamiltonian import gen_V
from seqm.seqm_functions.excited.hamiltonian import form_cis
from seqm.seqm_functions.excited.orb_transform import mo2ao



mol = run_seqm_1mol('h2o.xyz')
eval, _ = davidson(device = 'cpu', 
                   mol = mol, 
                   N_exc = 3,
                   keep_n = 2,
                   n_V_max = 10, 
                   max_iter = 3, 
                   tol = -1e-6)

logger.debug(fmt_log(eval, 'FINAL eval ', 'evals'))

<module> : 13 : DEBUG : [1m[32mFINAL eval [0m
[1m[32mtensor([ 5.94858,  6.83013,  9.23851,  9.99968, 11.31665, 12.61708], grad_fn=<SortBackward0>)[0m


 == GEN V ==
V shape torch.Size([1, 8, 6])
V
 tensor([[[0., 1., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0.]]])
[1m[47m[31m ITERATION : 0 [0m
SUBSPACE SIZE V:  torch.Size([1, 1, 8, 6])


RuntimeError: The expanded size of the tensor (4) must match the existing size (3) at non-singleton dimension 1.  Target sizes: [1, 4, 6].  Tensor sizes: [3, 6]

In [99]:
mol.e_mo

tensor([[-36.58831, -17.32189, -14.70534, -12.33198,   4.02645,   5.08758,   0.00000,   0.00000,   0.00000,   0.00000,   0.00000,   0.00000]])

In [88]:
mol.nHeavy

tensor([1])

In [74]:
 mol.norb

tensor(6)

In [93]:
mol.C_mo.transpose(1,2)

tensor([[[    -0.87887,     -0.00000,     -0.00000,     -0.10818,     -0.32855,     -0.32855],
         [    -0.00000,     -0.00000,     -0.77088,      0.00000,     -0.45041,      0.45041],
         [     0.34020,     -0.00000,     -0.00000,     -0.82490,     -0.31921,     -0.31921],
         [     0.00000,      1.00000,     -0.00000,     -0.00000,     -0.00000,     -0.00000],
         [    -0.33445,     -0.00000,     -0.00000,     -0.55482,      0.53866,      0.53866],
         [    -0.00000,      0.00000,      0.63698,     -0.00000,     -0.54510,      0.54510]]])