In [1]:
import argparse
import re
import numpy as np

np.set_printoptions(threshold=np.nan)

In [2]:
# This is simulating the command line arg parser
class args:
    filenames = ['./data/POSCAR_before.vesta','./data/POSCAR_after.vesta']
    colour = [255,0,0]
    id = []
    radius =0.5
    atoms_removed = [512]
    atoms_inserted = []
    cutoff = 0.05 # vector modulus cutoff, below which a vector will not be printed (angstrom)
    scale_factor = 1.0 # scale factor for vector moduli. (VECTS 3.000000)

settings = args

# need to remove bonds and atoms! 


# also SITET and SBOND



In [4]:
combined_positions = []
combined_data = []

for filename in [settings.filenames[0], settings.filenames[1]]:
    data = open(filename,'r').read()
    combined_data.append(data)
    
    struct_match=re.search(r"STRUC.*THERI",data,flags=re.S)[0]
    pos_match=re.findall(r"\d+\D+\s+\D+\d+\s+\d+\.\d+\s+(.*?)\s+\d+\D+\s+\d+",struct_match)

    if pos_match:
        positions = [[float(y) for y in x.split()] for x in pos_match]
    else:
        raise ValueError("Invalid file format (I am only able to parse Vesta files)")


    combined_positions.append(positions)
    
data = {"initial_data": combined_data[0],
        "final_data": combined_data[1],
        "initial_positions": combined_positions[0],
        "final_positions": combined_positions[1],
        }

for atom_id in sorted(settings.atoms_removed, reverse=True):
    del data["initial_positions"][atom_id-1]
    
for atom_id in sorted(settings.atoms_inserted, reverse=True):
    del data["final_positions"][atom_id-1]

assert len(combined_positions[0])==len(combined_positions[1]),"Unequal number of atoms before and after relaxation"

struct_match=re.findall(r"CELLP\n\s+(\d+\.\d+\s+\d+\.\d+\s+\d+\.\d+\s+)",combined_data[0])[0]
data["cell_lengths"] = [float(x) for x in struct_match.split()]

displacement_frac = np.subtract(combined_positions[1],combined_positions[0])
displacement_frac_adjusted = [[-(1-x) if (x>0.5) else x for x in line] for line in displacement_frac]
displacement_angs = np.multiply(displacement_frac_adjusted,data["cell_lengths"])
data["vectors"] = displacement_angs


In [21]:
VECTR_str=r"\1 "
for i,atom in enumerate(data["vectors"]):
    VECTR_str += "{0} {1} {2} {3} 0\n {0} 0 0 0 0\n 0 0 0 0 0\n".format(i+1,atom[0],atom[1],atom[2])

VECTT_str=r"\1 "
for i in range(len(data["vectors"])):
    VECTT_str += "{0} {1} {2} {3} {4} 0\n".format(i+1,settings.radius, settings.colour[0], settings.colour[1], settings.colour[2])

ATOMT_match = re.search(r"ATOMT.*SCENE",data["initial_data"],flags=re.S)[0]   # best if the end was indicated with zeros?
ATOMT_corrected = re.sub(r'([a-zA-Z]+\s+)\d+\.\d+',r"\1 0.0001",ATOMT_match)

BONDP_match = re.search(r"BONDP.*POLYP",data["initial_data"],flags=re.S)[0]   # best if the end was indicated with zeros?
BONDP_corrected = re.sub(r'(\d+\s+\d+\s+)\d+\.\d+',r"\1 0.0001",BONDP_match)

SBOND_match = re.search(r"SBOND.*SITET",data["initial_data"],flags=re.S)[0]
SBOND_corrected = re.sub(r'(\s+\d\s+\d\s+\d\s+\d\s+\d\s+)\d+\.\d+',r"\1 0.0001",SBOND_match)

SITET_match = re.search(r"SITET.*VECTR",data["initial_data"],flags=re.S)[0]
SITET_corrected = re.sub(r'([a-zA-Z]+\d+\s+)\d+\.\d+',r"\1 0.0001",SITET_match)

string_1 = re.sub(r'(VECTR\n)',VECTR_str,data["initial_data"])
string_2 = re.sub(r'(VECTT\n)',VECTT_str,string_1)
string_3 = re.sub(r'(ATOMT.*SCENE)',ATOMT_corrected,string_2,flags=re.S)
string_4 = re.sub(r'(BONDP.*POLYP)',BONDP_corrected,string_3,flags=re.S)
string_5 = re.sub(r'(SBOND.*SITET)',SBOND_corrected,string_4,flags=re.S)
string_6 = re.sub(r'(SITET.*VECTR)',SITET_corrected,string_5,flags=re.S)
output_string = string_6

file_out = open('vectors.vesta','w+')
file_out.write(output_string)
file_out.close()

# how to do multiple subs into a string?

In [39]:
output_string

'#VESTA_FORMAT_VERSION 3.3.0\n\n\nCRYSTAL\n\nTITLE\nCd Te\n\nGROUP\n1 1 P 1\nSYMOP\n 0.000000  0.000000  0.000000  1  0  0   0  1  0   0  0  1   1\n -1.0 -1.0 -1.0  0 0 0  0 0 0  0 0 0\nTRANM 0\n 0.000000  0.000000  0.000000  1  0  0   0  1  0   0  0  1\nLTRANSL\n -1\n 0.000000  0.000000  0.000000  0.000000  0.000000  0.000000\nLORIENT\n -1   0   0   0   0\n 1.000000  0.000000  0.000000  1.000000  0.000000  0.000000\n 0.000000  0.000000  1.000000  0.000000  0.000000  1.000000\nLMATRIX\n 1.000000  0.000000  0.000000  0.000000\n 0.000000  1.000000  0.000000  0.000000\n 0.000000  0.000000  1.000000  0.000000\n 0.000000  0.000000  0.000000  1.000000\n 0.000000  0.000000  0.000000\nCELLP\n 25.957600  25.957600  25.957600  90.000000  90.000000  90.000000\n  0.000000   0.000000   0.000000   0.000000   0.000000   0.000000\nSTRUC\n  1 Cd        Cd1  1.0000   0.125000   0.125000   0.000000    1a       1\n                            0.000000   0.000000   0.000000  0.00\n  2 Cd        Cd2  1.0000 

In [14]:
SITET_match


'SITET\n  1        Cd1  1.5200 242  30 220 242  30 220 204  0\n  2        Cd2  1.5200 242  30 220 242  30 220 204  0\n  3        Cd3  1.5200 242  30 220 242  30 220 204  0\n  4        Cd4  1.5200 242  30 220 242  30 220 204  0\n  5        Cd5  1.5200 242  30 220 242  30 220 204  0\n  6        Cd6  1.5200 242  30 220 242  30 220 204  0\n  7        Cd7  1.5200 242  30 220 242  30 220 204  0\n  8        Cd8  1.5200 242  30 220 242  30 220 204  0\n  9        Cd9  1.5200 242  30 220 242  30 220 204  0\n 10       Cd10  1.5200 242  30 220 242  30 220 204  0\n 11       Cd11  1.5200 242  30 220 242  30 220 204  0\n 12       Cd12  1.5200 242  30 220 242  30 220 204  0\n 13       Cd13  1.5200 242  30 220 242  30 220 204  0\n 14       Cd14  1.5200 242  30 220 242  30 220 204  0\n 15       Cd15  1.5200 242  30 220 242  30 220 204  0\n 16       Cd16  1.5200 242  30 220 242  30 220 204  0\n 17       Cd17  1.5200 242  30 220 242  30 220 204  0\n 18       Cd18  1.5200 242  30 220 242  30 220 204  0\n 1

In [17]:

#SBOND_corrected = re.sub(r'(\s+\d\s+\d\s+\d\s+\d\s+\d\s+)\d+\.\d+',r"\1 0.0001",SBOND_match)

In [18]:
SITET_corrected

'SITET\n  1        Cd1   0.0001 242  30 220 242  30 220 204  0\n  2        Cd2   0.0001 242  30 220 242  30 220 204  0\n  3        Cd3   0.0001 242  30 220 242  30 220 204  0\n  4        Cd4   0.0001 242  30 220 242  30 220 204  0\n  5        Cd5   0.0001 242  30 220 242  30 220 204  0\n  6        Cd6   0.0001 242  30 220 242  30 220 204  0\n  7        Cd7   0.0001 242  30 220 242  30 220 204  0\n  8        Cd8   0.0001 242  30 220 242  30 220 204  0\n  9        Cd9   0.0001 242  30 220 242  30 220 204  0\n 10       Cd10   0.0001 242  30 220 242  30 220 204  0\n 11       Cd11   0.0001 242  30 220 242  30 220 204  0\n 12       Cd12   0.0001 242  30 220 242  30 220 204  0\n 13       Cd13   0.0001 242  30 220 242  30 220 204  0\n 14       Cd14   0.0001 242  30 220 242  30 220 204  0\n 15       Cd15   0.0001 242  30 220 242  30 220 204  0\n 16       Cd16   0.0001 242  30 220 242  30 220 204  0\n 17       Cd17   0.0001 242  30 220 242  30 220 204  0\n 18       Cd18   0.0001 242  30 220 242 

In [19]:
ATOMT_match

'ATOMT\n  1         Cd  1.5200 242  30 220 242  30 220 204\n  2         Te  1.3700 173 162  81 173 162  81 204\n  0 0 0 0 0 0\nSCENE'

In [None]:
BONDP_corrected = re.sub(r'(\d+\s+\d+\s+)\d+\.\d+',r"\1 0.0001",BONDP_match)