In [None]:
import numpy as np
import sys
import os

In [None]:
def get_basename(filename):
    """
    This function extracts the basename of a file with filename on the format 'vibx-viby_i_j'
    """
    tmp_list = filename.split('-')
    vibs = tmp_list[0:2]
    int_list = []
    for i in range(len(vibs)):
        int_list.append(vibs[i].split('b')[1])
    return 'vib'+int_list[0]+'-vib'+int_list[1]+'-'


In [None]:
def extract_tdm(file,file2):
    """
    This function extracts the transition dipole moment of the first
    excitation from a formatted checkpoint file. Returns the x, y and z components of the tdm.
    """
    tdm_x=0          # x component of the transition dipole moment
    tdm_y=0          # y component of the transition dipole moment
    tdm_z=0          # z component of the transition dipole moment
    sign1=0          # sign of a representative MO1 coefficient of state 1
    sign2=0          # sign of a representative MO2 coefficient of state 2
    C1=0             # sign of the largest CI coefficient
    data0=[]
    
    # Extracts the transition dipole moment components fchk file from a single
    #point TD-DFT calculation:
    
    with open(file, 'r') as file:       
        line = file.readline()
        while line:
            if 'ETran state values' in line :
                line = file.readline()
                data = line.split()
                tdm_x = np.float(data[1])
                tdm_y = np.float(data[2])
                tdm_z = np.float(data[3])

            line = file.readline()
            
    #Extracts the CI coefficient of the transition and the signs of the MO
    #coefficients from a TD-DFT log file:
    
    with open(file2, 'r') as file2:
        line = file2.readline()
        sum0=0
        while line:

            if 'Excitation' in line:
                for i in range(3): #Specify main CI coefficient: range(3) if first line, 4 if second
                    line0=file2.readline()
                line0=file2.readline()
                data0=line0.split()
                C1=float(data0[3])
                print('C1='+str(C1))
            line=file2.readline()
        file2.seek(0)
        line=file2.readline()
        while line:
            if 'Coefficients' in line :
                #range(line number for ?Coefficients- line number for state 1 +2)
                for i in range(838-276+2): 
                    line=file2.readline()
                #range(MO1 number)
                for j in range(17): 
                    line=file2.readline()
                data = line.split()
                #In data[x], x must be the index of the MO1 in line
                sign1=float(data[6])
                print('sign1='+str(sign1))
                #range(line number for state 2 - previous line number+ MO2 number+2)
                for k in range(52-17+2):
                    line2=file2.readline()
                data2 = line2.split()
                #In data2[x], x must be the index of the MO2 in line
                sign2= float(data2[2])
                print('sign2='+str(sign2))
            line=file2.readline()

    total_sign=sign1*sign2
    if total_sign>0:     #change the sign of the tdm components if sign1*sign2 has changed sign
        tdm_x=-tdm_x
        tdm_y=-tdm_y
        tdm_z=-tdm_z
    if C1<0:            #change the sign of the tdm components if C1 has changed sign
        tdm_x=-tdm_x
        tdm_y=-tdm_y
        tdm_z=-tdm_z

    return tdm_x, tdm_y, tdm_z

In [None]:
def integers(file):
    """
    This function extracts the i and j indices for vibration 1 and 2 from the
    formatted checkpoint file name
    """
    foo= file[len(get_basename(file)):-5]
    integer_list=foo.split('_')
    integer_list=[int(x) for x in integer_list]
    return integer_list

In [None]:
"""
runs the extract_tdm and integers functions for all fchk files in a folder with
a given basename and returns a file with x, y and
z matrices of transitions dipole moments and matrices with i and j indices.
"""
steps=19
mu_mat=np.zeros((steps,steps))
vib1_mat=np.zeros((steps,steps))
vib2_mat=np.zeros((steps,steps))
tdm_x_mat=np.zeros((steps,steps))
tdm_y_mat=np.zeros((steps,steps))
tdm_z_mat=np.zeros((steps,steps))
cwd = os.getcwd()

sum1=0
with open('tdm_x_data.txt', 'w') as file1:
    with open('tdm_y_data.txt', 'w') as file2:
        with open('tdm_z_data.txt', 'w') as file3:
            with open('vib1_data.txt', 'w') as file4:
                with open('vib2_data.txt', 'w') as file5:
                    for filename in os.listdir(cwd):
                        if filename.endswith(".fchk"):
                            print(str(sum1))
                            sum1+=1
                            integer_list=integers(filename)
                            i=integer_list[0]
                            j=integer_list[1]
                            log_name=get_basename(filename)+str(i)+'_'+str(j)+'.log'
                            tdm_x_mat[i,j], tdm_y_mat[i,j], tdm_z_mat[i,j]=extract_tdm(filename,log_name)

                            file1.write(str(tdm_x_mat[i,j]))
                            file1.write(' ')
                            file2.write(str(tdm_y_mat[i,j]))
                            file2.write(' ')
                            file3.write(str(tdm_z_mat[i,j]))
                            file3.write(' ')
                            file4.write(str(i))
                            file4.write(' ')
                            file5.write(str(j))
                            file5.write(' ')

