In [1]:
import os

In [46]:
# This function generates a list of gaussian input names as well as extracts
# the names of original pdb files and their relative paths.
# Rootdir argument is the directory from which the directory tree search starts

def generateInputNames(rootdir='.'):

    rootdir_length = len(rootdir) + 1 # length of the root directory plus '/' sign

    gaussian_input_names = []
    pdb_filenames = []
    
    dir_counter = -1 # do not include the root dir
    file_counter = 0

    # Loop through all directories and files in the root directory
    for subdir, dirs, files in os.walk(rootdir):
        dir_counter += 1
        # Loop over all files
        for file in files:
            if '.pdb' in file:
                file_counter += 1
                pdb_file = os.path.join(subdir, file)
                pdb_filenames.append(pdb_file)

                input_name = os.path.join(subdir, file)[rootdir_length:].replace('/', '_').replace('pdb','com')
                gaussian_input_names.append(f'{input_name}-{file_counter}')
                
    print(f'Found {file_counter} pdb files in {dir_counter} directories...\n')
    
    return(pdb_filenames, gaussian_input_names)

In [47]:
pdb_filenames, gaussian_input_names = generateInputNames()

Found 100 pdb files in 12 directories...


In [4]:
# This function reads a single PDB file and extracts all the useful data
# for the gaussian input file

def readPDB(filename):

    atom_data = []

    with open(filename, 'r') as file:
        lines = file.readlines()
        for line in lines:
            if 'ATOM' in line:
                atomname = line.split()[-1]
                pdbname = line.split()[2]
                resname = line.split()[3]
                resnum = line.split()[4]
                X = '{:.8f}'.format(float(line.split()[5]))
                Y = '{:.8f}'.format(float(line.split()[6]))
                Z = '{:.8f}'.format(float(line.split()[7]))

                atom_data.append(f' {atomname:}(PDBName={pdbname},ResName={resname},ResNum={resnum}) {X:>20} {Y:>20} {Z:>20} \n')
    
    return(atom_data)

In [5]:
# This function writes a single gaussian input file
# based on the data from the PDB file and
# some additional gaussian-related info

def writeGaussianInput(filename, atom_data, ncores, method, basis_set, keywords, charge, multiplicity):
    
    with open(filename, 'w') as file:
        file.write(f'%nprocshared={ncores} \n')
        file.write(f'%chk={filename.replace("com", "chk")} \n')
        file.write(f'# {method}/{basis_set} {keywords} \n\n')
        file.write(f'{filename.replace(".com", "")} \n\n')
        file.write(f'{charge} {multiplicity} \n')
        file.writelines(atom_data)
        file.write('\n')
        
    return

In [51]:
# This function generates gaussian input files corresponding
# to every PDB file that is found in the directory tree

def generateGaussianInput(pdb_filenames, gaussian_input_names, 
                          ncores='6', method='b3pw91', basis_set="6-31g(d')", 
                          keywords='empiricaldispersion=gd3', charge='2', multiplicity='1'):
    
    print(f'Writing {len(gaussian_input_names)} gaussian input files using \n {ncores} CPU core(s), {method}/{basis_set} level of theory, {keywords}, charge = {charge} and multiplicity = {multiplicity} ... \n\n')
    for pdbname, inputname in zip(pdb_filenames, gaussian_input_names):
        atom_data = readPDB(pdbname)
        writeGaussianInput(inputname, atom_data, ncores=ncores, method=method, 
                           basis_set=basis_set, keywords=keywords, charge=charge, multiplicity=multiplicity)
        
    print('Done! \n')
        
        
    return

In [197]:
atom_data = readPDB(pdb_filenames[0])

In [176]:
writeGaussianInput(gaussian_input_names[0], atom_data, ncores='6', method='b3pw91', basis_set="6-31g(d')",
    keywords='empiricaldispersion=gd3', charge='2', multiplicity='1')

In [205]:
generateGaussianInput(pdb_filenames, gaussian_input_names, 
                          ncores='6', method='b3pw91', basis_set="6-31g(d')", 
                          keywords='empiricaldispersion=gd3', charge='2', multiplicity='1')