In [1]:
from bs4 import BeautifulSoup
import numpy as np
from pymatgen.core import Structure
from ase.io import read, write
from pymatgen.io.pwscf import PWInput

In [2]:
%%bash
ls
pwd

C.pbe-n-kjpaw_psl.1.0.0.UPF
CONTCAR
CONTCAR.xyz
CONTCAR_ase.vasp
CONTCAR_ase.vesta
POSCAR
get_structure_pw.ipynb
grahite_latt.in
grahite_vdw_latt.in
graphite.xml
graphite_bulk.png
graphite_latt.in
graphite_vdw_latt.in
log
log_wo_vdw
run_espresso.batch
slurm-3791402.out
/Users/riteshkumar/Library/CloudStorage/Box-Box/Research-postdoc/liquid-electrolyte-simulations/graphite-intercalation-peiyuan/graphite/bulk


In [3]:
with open('graphite.xml', 'r') as f:
    data = f.read()

In [4]:
bs_data = BeautifulSoup(data, "xml")
bs_data

<?xml version="1.0" encoding="utf-8"?>
<qes:espresso Units="Hartree atomic units" xmlns:qes="http://www.quantum-espresso.org/ns/qes/qes-1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.quantum-espresso.org/ns/qes/qes-1.0 http://www.quantum-espresso.org/ns/qes/qes_200420.xsd">
<!--All quantities are in Hartree atomic units unless otherwise specified-->
<general_info>
<xml_format NAME="QEXSD" VERSION="20.04.20">QEXSD_20.04.20</xml_format>
<creator NAME="PWSCF" VERSION="6.6">XML file generated by PWSCF</creator>
<created DATE=" 6Apr2022" TIME=" 7: 8: 8">This run was terminated on:   7: 8: 8   6 Apr 2022</created>
<job/>
</general_info>
<parallel_info>
<nprocs>4</nprocs>
<nthreads>1</nthreads>
<ntasks>1</ntasks>
<nbgrp>1</nbgrp>
<npool>1</npool>
<ndiag>4</ndiag>
</parallel_info>
<input>
<control_variables>
<title/>
<calculation>vc-relax</calculation>
<restart_mode>from_scratch</restart_mode>
<prefix>graphite</prefix>
<pseudo_dir>.</pseudo_dir>
<outd

In [10]:
## Get all the cell lists 
cell_lists = bs_data.find_all('cell')
cell_lists

[<cell>
 <a1>4.663322511166004e0 0.000000000000000e0 0.000000000000000e0</a1>
 <a2>-2.331661255583002e0 4.038556377651229e0 0.000000000000000e0</a2>
 <a3>0.000000000000000e0 0.000000000000000e0 1.641234320196755e1</a3>
 </cell>,
 <cell>
 <a1>4.663322511166004e0 0.000000000000000e0 0.000000000000000e0</a1>
 <a2>-2.331661255583002e0 4.038556377651229e0 0.000000000000000e0</a2>
 <a3>0.000000000000000e0 0.000000000000000e0 1.641234320196755e1</a3>
 </cell>,
 <cell>
 <a1>4.658562802016909e0 -1.985928686351244e-18 8.506277653528331e-33</a1>
 <a2>-2.329281401008454e0 4.034434349243181e0 -8.506277653528331e-33</a2>
 <a3>0.000000000000000e0 0.000000000000000e0 1.637123259884357e1</a3>
 </cell>,
 <cell>
 <a1>4.657155580274660e0 -3.080116267890514e-18 1.381563429067452e-32</a1>
 <a2>-2.328577790137330e0 4.033215659650540e0 -1.381563429067135e-32</a2>
 <a3>2.578137670955025e-35 -1.488488427713551e-35 1.632601093540714e1</a3>
 </cell>,
 <cell>
 <a1>4.656044290613247e0 -7.905123734171283e-18 2.31724

In [11]:
## Then extract the final lattice vector
cell = cell_lists[-1]
cell

<cell>
<a1>4.659372050571620e0 5.666919316619603e-13 2.285984756051978e-31</a1>
<a2>-2.329686025283249e0 4.035135143976650e0 -3.651499080901406e-31</a2>
<a3>-4.728380587554558e-30 1.551418428521964e-29 1.279586558649308e1</a3>
</cell>

In [20]:
cell_a1 = cell.find('a1').contents
cell_a1[0]

'4.659372050571620e0 5.666919316619603e-13 2.285984756051978e-31'

In [23]:
cell_a1[0].split(' ')[0]

'4.659372050571620e0'

In [24]:
float(cell_a1[0].split(' ')[0])

4.65937205057162

In [25]:
float(cell_a1[0].split(' ')[2])

2.285984756051978e-31

In [27]:
## Function for converting the cell in Bohrs to angstrom
def convert_bohr_angstrom(cell):
    for i in range(3):
        for j in range(3):
            cell[i][j] *= 0.529177
    return cell

Get the lattice vector

In [30]:
cell_a1 = cell.find('a1').contents[0]
cell_a2 = cell.find('a2').contents[0]
cell_a3 = cell.find('a3').contents[0]

In [32]:
cell_final = [
 [float(cell_a1.split(' ')[0]), float(cell_a1.split(' ')[1]), float(cell_a1.split(' ')[2])],
 [float(cell_a2.split(' ')[0]), float(cell_a2.split(' ')[1]), float(cell_a2.split(' ')[2])],
 [float(cell_a3.split(' ')[0]), float(cell_a3.split(' ')[1]), float(cell_a3.split(' ')[2])],
]
cell_final

[[4.65937205057162, 5.666919316619603e-13, 2.285984756051978e-31],
 [-2.329686025283249, 4.03513514397665, -3.651499080901406e-31],
 [-4.728380587554558e-30, 1.551418428521964e-29, 12.79586558649308]]

In [33]:
cell_final = convert_bohr_angstrom(cell_final)
cell_final

[[2.465632523605338, 2.9988033632108115e-13, 1.2096905552533174e-31],
 [-1.2328162618013139, 2.1353007100841315, -1.9322893291341634e-31],
 [-2.5021502541803583e-30, 8.209749497499673e-30, 6.771277763463649]]

Get the positions

In [34]:
position_list = bs_data.find_all('atomic_positions')[-1]
position_list

<atomic_positions>
<atom index="1" name="C">-4.671677034549796e-20 -6.687480244488877e-21 3.198966396623738e0</atom>
<atom index="2" name="C">-7.120990659311365e-19 -4.305312660030200e-19 9.596899189869914e0</atom>
<atom index="3" name="C">2.329688354970578e0 1.345043702947964e0 3.198966396623738e0</atom>
<atom index="4" name="C">-2.329683967808210e-6 2.690091441029526e0 9.596899189869914e0</atom>
</atomic_positions>

In [49]:
len(position_list)

9

In [38]:
position_list.find('atom index="1" name="C"') ## not working

In [48]:
position_list.contents[5].contents

['2.329688354970578e0 1.345043702947964e0 3.198966396623738e0']

In [None]:
pos = []
for i in range(len)

Get the energy

# 1 Hartree = 27.21139664 eV

In [None]:
PWInput()