In [69]:
import argparse
import re
import numpy as np

np.set_printoptions(threshold=np.nan)

In [2]:
# This is simulating the command line arg parser
class args:
    filenames = ['./data/POSCAR_before.vesta','./data/POSCAR_after.vesta']
    colour = [255,0,0]
    id = []
    radius = 1.0
    atoms_removed = [512]
    atoms_inserted = []

settings = args

In [120]:
combined_positions = []
combined_data = []

for filename in [settings.filenames[0], settings.filenames[1]]:
    data = open(filename,'r').read()
    combined_data.append(data)
    
    struct_match=re.search(r"STRUC.*THERI",data,flags=re.S)[0]
    pos_match=re.findall(r"\d+\D+\s+\D+\d+\s+\d+\.\d+\s+(.*?)\s+\d+\D+\s+\d+",struct_match)

    if pos_match:
        positions = [[float(y) for y in x.split()] for x in pos_match]
    else:
        raise ValueError("Invalid file format (I am only able to parse Vesta files)")


    combined_positions.append(positions)
    
data = {"initial_data": combined_data[0],
        "final_data": combined_data[1],
        "initial_positions": combined_positions[0],
        "final_positions": combined_positions[1],
        }

for atom_id in sorted(settings.atoms_removed, reverse=True):
    del data["initial_positions"][atom_id-1]
    
for atom_id in sorted(settings.atoms_inserted, reverse=True):
    del data["final_positions"][atom_id-1]

assert len(combined_positions[0])==len(combined_positions[1]),"Unequal number of atoms before and after relaxation"

struct_match=re.findall(r"CELLP\n\s+(\d+\.\d+\s+\d+\.\d+\s+\d+\.\d+\s+)",combined_data[0])[0]
data["cell_lengths"] = [float(x) for x in struct_match.split()]

displacement_frac = np.subtract(combined_positions[1],combined_positions[0])
displacement_frac_adjusted = [[-(1-x) if (x>0.5) else x for x in line] for line in displacement_frac]
displacement_angs = np.multiply(displacement_frac_adjusted,data["cell_lengths"])
data["vectors"] = displacement_angs


In [131]:
VECTR_str=r"\1 trial\n"
VECTT_str=r"\1 trial\n"
intermediate_string = re.sub(r'(VECTR\n)',VECTR_str,data["initial_data"])
output_string = re.sub(r'(VECTT\n)',VECTT_str,intermediate_string)

file_out = open('vectors.vesta','w+')
file_out.write(output_string)
file_out.close()

In [122]:
output_string

'#VESTA_FORMAT_VERSION 3.3.0\n\n\nCRYSTAL\n\nTITLE\nCd Te\n\nGROUP\n1 1 P 1\nSYMOP\n 0.000000  0.000000  0.000000  1  0  0   0  1  0   0  0  1   1\n -1.0 -1.0 -1.0  0 0 0  0 0 0  0 0 0\nTRANM 0\n 0.000000  0.000000  0.000000  1  0  0   0  1  0   0  0  1\nLTRANSL\n -1\n 0.000000  0.000000  0.000000  0.000000  0.000000  0.000000\nLORIENT\n -1   0   0   0   0\n 1.000000  0.000000  0.000000  1.000000  0.000000  0.000000\n 0.000000  0.000000  1.000000  0.000000  0.000000  1.000000\nLMATRIX\n 1.000000  0.000000  0.000000  0.000000\n 0.000000  1.000000  0.000000  0.000000\n 0.000000  0.000000  1.000000  0.000000\n 0.000000  0.000000  0.000000  1.000000\n 0.000000  0.000000  0.000000\nCELLP\n 25.957600  25.957600  25.957600  90.000000  90.000000  90.000000\n  0.000000   0.000000   0.000000   0.000000   0.000000   0.000000\nSTRUC\n  1 Cd        Cd1  1.0000   0.125000   0.125000   0.000000    1a       1\n                            0.000000   0.000000   0.000000  0.00\n  2 Cd        Cd2  1.0000 

In [80]:
displacement_frac

array([[ 7.19000e-04,  3.03000e-04,  1.35000e-04],
       [-2.98000e-04,  3.50000e-04,  1.32000e-04],
       [-1.81200e-03,  1.81200e-03,  3.00000e-06],
       [-4.69000e-04,  3.24600e-03,  9.99531e-01],
       [ 2.35000e-04,  1.19000e-04,  8.20000e-05],
       [-1.40000e-04,  1.39000e-04,  1.15000e-04],
       [-3.50000e-04,  2.98000e-04,  1.32000e-04],
       [ 6.90000e-05,  2.95000e-04,  6.90000e-05],
       [ 6.48000e-04, -6.48000e-04,  3.22000e-04],
       [-1.19000e-04, -2.35000e-04,  8.10000e-05],
       [-3.03000e-04, -7.19000e-04,  1.35000e-04],
       [ 8.56000e-04, -1.43700e-03,  8.56000e-04],
       [ 1.43700e-03, -8.56000e-04,  8.56000e-04],
       [-2.95000e-04, -6.90000e-05,  6.90000e-05],
       [-3.24600e-03,  4.69000e-04,  9.99531e-01],
       [ 1.63750e-02, -1.63730e-02,  1.63750e-02],
       [ 2.04000e-04,  1.64000e-04,  2.04000e-04],
       [-2.70000e-05,  1.47000e-04,  1.18000e-04],
       [-2.33000e-04,  2.34000e-04,  1.92000e-04],
       [ 1.35000e-04,  3.03000e

In [31]:
combined_data[0]

'#VESTA_FORMAT_VERSION 3.3.0\n\n\nCRYSTAL\n\nTITLE\nCd Te\n\nGROUP\n1 1 P 1\nSYMOP\n 0.000000  0.000000  0.000000  1  0  0   0  1  0   0  0  1   1\n -1.0 -1.0 -1.0  0 0 0  0 0 0  0 0 0\nTRANM 0\n 0.000000  0.000000  0.000000  1  0  0   0  1  0   0  0  1\nLTRANSL\n -1\n 0.000000  0.000000  0.000000  0.000000  0.000000  0.000000\nLORIENT\n -1   0   0   0   0\n 1.000000  0.000000  0.000000  1.000000  0.000000  0.000000\n 0.000000  0.000000  1.000000  0.000000  0.000000  1.000000\nLMATRIX\n 1.000000  0.000000  0.000000  0.000000\n 0.000000  1.000000  0.000000  0.000000\n 0.000000  0.000000  1.000000  0.000000\n 0.000000  0.000000  0.000000  1.000000\n 0.000000  0.000000  0.000000\nCELLP\n 25.957600  25.957600  25.957600  90.000000  90.000000  90.000000\n  0.000000   0.000000   0.000000   0.000000   0.000000   0.000000\nSTRUC\n  1 Cd        Cd1  1.0000   0.125000   0.125000   0.000000    1a       1\n                            0.000000   0.000000   0.000000  0.00\n  2 Cd        Cd2  1.0000 

In [87]:
combined_positions[1]

[[0.125719, 0.125303, 0.000135],
 [0.374702, 0.12535, 0.000132],
 [0.623188, 0.126812, 3e-06],
 [0.874531, 0.128246, 0.999531],
 [0.125235, 0.375119, 8.2e-05],
 [0.37486, 0.375139, 0.000115],
 [0.62465, 0.375298, 0.000132],
 [0.875069, 0.375295, 6.9e-05],
 [0.125648, 0.624352, 0.000322],
 [0.374881, 0.624765, 8.1e-05],
 [0.624697, 0.624281, 0.000135],
 [0.875856, 0.623563, 0.000856],
 [0.126437, 0.874144, 0.000856],
 [0.374705, 0.874931, 6.9e-05],
 [0.621754, 0.875469, 0.999531],
 [0.891375, 0.858627, 0.016375],
 [0.125204, 0.125164, 0.250204],
 [0.374973, 0.125147, 0.250118],
 [0.624767, 0.125234, 0.250192],
 [0.875135, 0.125303, 0.250719],
 [0.125122, 0.375067, 0.250122],
 [0.374968, 0.375032, 0.25008],
 [0.624853, 0.375027, 0.250118],
 [0.875081, 0.375119, 0.250235],
 [0.12513, 0.62487, 0.25013],
 [0.374933, 0.624878, 0.250122],
 [0.624836, 0.624796, 0.250204],
 [0.875322, 0.624352, 0.250648],
 [0.125648, 0.874678, 0.250648],
 [0.374881, 0.874919, 0.250235],
 [0.624697, 0.874865, 0.

In [12]:
[[float(y) for y in x.split()] for x in pos_match]

[[0.125, 0.125, 0.0],
 [0.375, 0.125, 0.0],
 [0.625, 0.125, 0.0],
 [0.875, 0.125, 0.0],
 [0.125, 0.375, 0.0],
 [0.375, 0.375, 0.0],
 [0.625, 0.375, 0.0],
 [0.875, 0.375, 0.0],
 [0.125, 0.625, 0.0],
 [0.375, 0.625, 0.0],
 [0.625, 0.625, 0.0],
 [0.875, 0.625, 0.0],
 [0.125, 0.875, 0.0],
 [0.375, 0.875, 0.0],
 [0.625, 0.875, 0.0],
 [0.875, 0.875, 0.0],
 [0.125, 0.125, 0.25],
 [0.375, 0.125, 0.25],
 [0.625, 0.125, 0.25],
 [0.875, 0.125, 0.25],
 [0.125, 0.375, 0.25],
 [0.375, 0.375, 0.25],
 [0.625, 0.375, 0.25],
 [0.875, 0.375, 0.25],
 [0.125, 0.625, 0.25],
 [0.375, 0.625, 0.25],
 [0.625, 0.625, 0.25],
 [0.875, 0.625, 0.25],
 [0.125, 0.875, 0.25],
 [0.375, 0.875, 0.25],
 [0.625, 0.875, 0.25],
 [0.875, 0.875, 0.25],
 [0.125, 0.125, 0.5],
 [0.375, 0.125, 0.5],
 [0.625, 0.125, 0.5],
 [0.875, 0.125, 0.5],
 [0.125, 0.375, 0.5],
 [0.375, 0.375, 0.5],
 [0.625, 0.375, 0.5],
 [0.875, 0.375, 0.5],
 [0.125, 0.625, 0.5],
 [0.375, 0.625, 0.5],
 [0.625, 0.625, 0.5],
 [0.875, 0.625, 0.5],
 [0.125, 0.875, 

In [44]:
if args.atoms_inserted:
    print ("true")

In [68]:
data["final_positions"]

[[0.374702, 0.12535, 0.000132],
 [0.125648, 0.624352, 0.000322],
 [0.374881, 0.624765, 8.1e-05],
 [0.624697, 0.624281, 0.000135],
 [0.875856, 0.623563, 0.000856],
 [0.126437, 0.874144, 0.000856],
 [0.374705, 0.874931, 6.9e-05],
 [0.621754, 0.875469, 0.999531],
 [0.891375, 0.858627, 0.016375],
 [0.125204, 0.125164, 0.250204],
 [0.374973, 0.125147, 0.250118],
 [0.624767, 0.125234, 0.250192],
 [0.875135, 0.125303, 0.250719],
 [0.125122, 0.375067, 0.250122],
 [0.374968, 0.375032, 0.25008],
 [0.624853, 0.375027, 0.250118],
 [0.875081, 0.375119, 0.250235],
 [0.12513, 0.62487, 0.25013],
 [0.374933, 0.624878, 0.250122],
 [0.624836, 0.624796, 0.250204],
 [0.875322, 0.624352, 0.250648],
 [0.125648, 0.874678, 0.250648],
 [0.374881, 0.874919, 0.250235],
 [0.624697, 0.874865, 0.250719],
 [0.875856, 0.874144, 0.251437],
 [0.125118, 0.125147, 0.499973],
 [0.374945, 0.125121, 0.499945],
 [0.624815, 0.125185, 0.499776],
 [0.875132, 0.12535, 0.499702],
 [0.12508, 0.375032, 0.499968],
 [0.374964, 0.37503

In [74]:
for index in sorted([1,2], reverse=True):
    del data["final_positions"][index]

TypeError: string indices must be integers

In [75]:
data["final_positions"]

TypeError: string indices must be integers