# Data analysis of powder XRD data

In [2]:
# Import relevant libraries
from datetime import date
import glob
import os
import re

import numpy as np
import pandas as pd
import scipy.signal as sps

from matplotlib import pyplot as plt

In [3]:
# Create string containing today's date, in yyyy-mm-dd format
todaystr = str(date.today())

In [4]:
# Change to directory containing folders with data
os.chdir(r'C:\Users\Pierre\Desktop\Postdoc\YTmVO4\YTmVO4_neutrons\2021_ORNL_powder_ND')

## Reference spectra taken from the ICDD

In [5]:
# Files containing ICSD reference XRD spectra follow a name pattern containing '_ICSD' and with the extension '.xy',
# and are located in subfolder of the working directory, hence the '**/' in the name pattern
# glob.glob creates a list of filenames matching the input pattern
ref_files = glob.glob('**/*_ICSD_*.xy', recursive=True)

In [6]:
# Scan through the list created by the above glob.glob() command and:
# 1. Extract the compound name from the filename
# 2. Import the data from each file into a Pandas DataFrame using the Pandas command read_csv(), 
#    and store the DataFrame into a dictionary called 'dref'
dref = {}
for file in ref_files:
    # The following regular expression pattern includes files with filenames formatted as:
    # 1. any sequence of alphanumeric characters other than '_'
    # 2. followed by any sequence of characters
    # 3. contained in subfolders of the current working directory, with names formatted as: 
    #    a. date format of the form yyyy-mm-dd, 
    #    b. followed by any sequence of alphanumeric characters
    fm = re.match('\d{4}-\d{2}-\d{2}\w+\\\\([^\W_]+)_*', file)
    compound = fm.group(1)
    with open(file, 'r'):
        dref[compound] = pd.read_csv(file, sep='\s+', header=0, names=['2theta', 'I', 'std?'])
        
dref[compound]

Unnamed: 0,2theta,I,std?
0,1.01,0.10870,0.0
1,1.02,0.10665,0.0
2,1.03,0.10465,0.0
3,1.04,0.10271,0.0
4,1.05,0.10083,0.0
...,...,...,...
11894,119.95,0.22601,0.0
11895,119.96,0.22209,0.0
11896,119.97,0.21937,0.0
11897,119.98,0.21810,0.0


In [7]:
xlabel = r'2$\theta$ (°)'
ylabel = 'I (a.u.)'

In [27]:
# Plot reference spectra
# Uncomment %matplotlib to reset graphs and plot in external window (or whichever the default plotting environment is)
%matplotlib
fig = plt.figure()
for cpd in dref.keys():
    plt.plot(dref[cpd]['2theta'], dref[cpd]['I'], label=cpd)
    plt.xlim([0,75])
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title('XRD pattern of TmVO4 and YVO4 as reported in the ICSD database')
    plt.legend()

Using matplotlib backend: Qt5Agg


## Experimental powder XRD data

In [9]:
# Create list of datafiles contained in subfolders and matching the pattern given in argument
xp_files = glob.glob('**/*-background.ASC', recursive=True)

In [10]:
# For each filename contained in the above-created list:
# 1. Extract files matching regular expression pattern given in argument of re.match()
# 2. Import the data from each file into a Pandas DataFrame using the Pandas command read_csv(), 
#    and store the DataFrame into a dictionary called 'dxp'
compounds = {}
dxp = {}
for file in xp_files:
    # The following regular expression pattern includes files with filenames formatted as:
    # 1. date format of the form yyyy-mm-dd
    # 2. followed by two sequences of alphanumeric characters other than '_' separated by a '-'
    # 3. followed by two sequences of alphanumeric characters (including '_') separated by a '-'
    # 4. contained in subfolders of the current working directory, with names formatted as: 
    #    a. date format of the form yyyy-mm-dd, 
    #    b. followed by any sequence of alphanumeric characters
    fm = re.match('\d{4}-\d{2}-\d{2}\w+\\\\(\d{4}-\d{2}-\d{2})_([^\W_]+)-([^\W_]+)\w+-\w+.ASC', file)
    date = fm.group(1)
    batch = fm.group(3)
    compounds[batch] = fm.group(2)
    with open(file, 'r'):
        dxp[batch,date] = pd.read_csv(file, sep=' ', header=0, names=['2theta', 'I0', 'Estimated Std'])

dxp[batch,date]

Unnamed: 0,2theta,I0,Estimated Std
0,15.021162,-20.060276,29.614357
1,15.035162,-10.522699,29.759336
2,15.049162,9.037492,30.070728
3,15.063162,-6.422026,29.796972
4,15.077162,6.514793,29.997779
...,...,...,...
4279,74.927162,103.837523,22.245806
4280,74.941162,72.763747,21.607136
4281,74.955162,62.831359,21.447662
4282,74.969162,19.106866,20.477889


In [11]:
# List of names of relevant growth batches 
batches = ['LS5695', 'LS5701', 'LS5696', 'LS5697']

In [35]:
# Compute and subtract any residual constant background
for key in dxp.keys():
    I = dxp[key]['I0']
    Iref = np.mean(I)# average spectrum intensity
    mult = 1.3
    Im = np.mean(I[I<Iref*mult])# average spectrum intensity, excluding datapoitns that are more than 30% above average
    ii = 0
    # Recompute average background intensity as long as it changes when excluding high intensity datapoints
    # or as long as the absolute average intensity is more than 10
    while abs(Iref-Im)/Iref>0.01:
        print(ii, Iref, Im)
        if ii>100 or np.mean(I[I<Im*mult])<10:
            break
        Iref = Im
        Im = np.mean(I[I<Iref*mult])
        ii = ii+1

    print(ii, Iref, Im)
    dxp[key]['I'] = dxp[key]['I0']-Im
    dxp[key]
    print()

0 381.745181395801 315.1011523919672
1 315.1011523919672 310.24887172674687
2 310.24887172674687 309.9170183031991

0 393.3541474402131 309.18051098084044
1 309.18051098084044 303.62188803157835
2 303.62188803157835 303.0619490611636

0 433.8522858520172 313.865644417357
1 313.865644417357 304.34229103662386
2 304.34229103662386 303.17062259656973

0 393.89651271993574 312.3195552760272
1 312.3195552760272 307.2008893883647
2 307.2008893883647 306.7936270683599

0 82.58612385788017 13.816051295977827
0 82.58612385788017 13.816051295977827

0 87.13173199847142 15.999686442274745
0 87.13173199847142 15.999686442274745

0 71.8562557783523 8.319104585299314
0 71.8562557783523 8.319104585299314



## Compare measured and reported XRD patterns for pure TmVO4

In [36]:
# Full spectra
ref = 'TmVO4'
Tm_key = ('LS5695', '2021-03-12')
fig = plt.figure()
plt.plot(dref[ref]['2theta'], dref[ref]['I'], label='ICSD')
plt.plot(dxp[Tm_key]['2theta'], dxp[Tm_key]['I']*1e-2, label=Tm_key[0])
plt.xlim([18,40])
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title('XRD pattern of TmVO4-LS5695 vs pattern calculated from .cif file')
plt.legend()

<matplotlib.legend.Legend at 0x23b61791d48>

In [None]:
plt.savefig(f'./{todaystr}_YTmVO4_analysis/{todaystr}_TmVO4_full_spectrum.pdf')

### Focus on three most intense peaks

In [31]:
# Parameters for zoom on peaks
refs = list(dref.keys())
# xlim = {'ax1':[18.7,19.1], 'ax2':[24.5,25.75], 'ax3':[33.25,34.25]}
xlim = {'ax1':[18.6,19.2], 'ax2':[24.8,25.5], 'ax3':[33.4,34.1]}

In [37]:
# Figure with three panels for zoom on main peaks
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True)
# fig.subplots_adjust(hspace=0.025)  # adjust space between axes

for sax in ['ax1', 'ax2', 'ax3']:
    ax = eval(sax)
    xref = dref[refs[0]]['2theta']
    Iref = dref[refs[0]]['I']
    ax.plot(xref, Iref/np.max(Iref[np.logical_and(xref>xlim[sax][0], xref<xlim[sax][1])]), 
            label=f'{refs[0]} (calc.)')

    for batch in batches[:1]:
        xxp = dxp[Tm_key]['2theta']
        Ixp = dxp[Tm_key]['I']
        ax.plot(xxp, Ixp/np.max(Ixp[np.logical_and(xxp>xlim[sax][0], xxp<xlim[sax][1])]), 
                label=Tm_key[0])
        
    ax.set_xlabel(xlabel)
    ax.set_xlim(xlim[sax])
    ax.set_ylim([0,1.1])
# ax2.set_xlim(xlim2)
# ax3.set_xlim(xlim3)
ax1.set_ylabel(ylabel)
ax2.set_title('XRD patterns of Y-substituted TmVO4 vs calculated patterns of TmVO4 and YVO4')
plt.legend()

<matplotlib.legend.Legend at 0x23b638f43c8>

In [197]:
plt.savefig(f'./{todaystr}_YTmVO4_analysis/{todaystr}_TmVO4_zoom.pdf', bbox_inches='tight')

## Compare measured XRD patterns of Y-substituted TmVO4 vs calculated patterns of TmVO4 and YVO4

In [75]:
# Plot full spectra 

dxpk = list(dxp.keys())
# List of tuples matching a certain criterium
measured = [(batch,date) for (batch,date) in dxpk if '2021-03-24' in date]

fig = plt.figure()
plt.plot(dref[refs[0]]['2theta'], dref[refs[0]]['I'], label=f'{refs[0]} (ICSD)')
for key in measured:
    plt.plot(dxp[key]['2theta'], dxp[key]['I']*1e2/max(dxp[key]['I']), label=compounds[key[0]])
plt.plot(dref[refs[1]]['2theta'], dref[refs[1]]['I'], label=f'{refs[1]} (ICSD)')
plt.xlim([15,75])
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.title('XRD patterns of sintered Y-TmVO4 along with reported patterns of TmVO4 and YVO4')
plt.legend()

<matplotlib.legend.Legend at 0x1c4ccf291c8>

In [76]:
plt.savefig(f'./{todaystr}_YTmVO4_analysis/{todaystr}_YTmVO4.pdf', bbox_inches='tight')

In [71]:
# Figure with three panels for zoom on main peaks
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True)

# xlim = {'ax1':[18.7,19.1], 'ax2':[24.8,25.4], 'ax3':[33.4,34.]}
xlim = {'ax1':[18.6,19.2], 'ax2':[24.8,25.5], 'ax3':[33.4,34.1]}

for sax in ['ax1', 'ax2', 'ax3']:
    ax = eval(sax)
    xref = dref[refs[0]]['2theta']
    Iref = dref[refs[0]]['I']
    ax.plot(xref, Iref/np.max(Iref[np.logical_and(xref>xlim[sax][0], xref<xlim[sax][1])]), 
            label=f'{refs[0]} (ICSD)')

    for key in measured[::-1]:
        xxp = dxp[key]['2theta']
        Ixp = dxp[key]['I']
        ax.plot(xxp, Ixp/np.max(Ixp[np.logical_and(xxp>xlim[sax][0], xxp<xlim[sax][1])]), 
                label=compounds[key[0]])
        
    xref = dref[refs[1]]['2theta']
    Iref = dref[refs[1]]['I']
    ax.plot(xref, Iref/np.max(Iref[np.logical_and(xref>xlim[sax][0], xref<xlim[sax][1])]), 
            label=f'{refs[1]} (ICSD)')

    ax.set_xlabel(xlabel)
    ax.set_xlim(xlim[sax])
    ax.set_ylim([0,1.1])
# ax2.set_xlim(xlim2)
# ax3.set_xlim(xlim3)
ax1.set_ylabel(ylabel)
ax2.set_title('XRD patterns of Y-substituted TmVO4 vs reported patterns of TmVO4 and YVO4')
plt.legend()

<matplotlib.legend.Legend at 0x1c4ccc43e48>

In [73]:
plt.savefig(f'./{todaystr}_YTmVO4_analysis/{todaystr}_YTmVO4_zoom.pdf', bbox_inches='tight')

## Compare XRD patterns before and after sintering

In [55]:
batches = ['LS5696','LS5697']
dates = np.unique([date for (batch,date) in dxpk])
fig = {}

for idx, batch in enumerate(batches[1:]):
    fig[idx], (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True)

    xlim = {'ax1':[18.6,19.2], 'ax2':[24.8,25.5], 'ax3':[33.4,34.1]}

    for sax in ['ax1', 'ax2', 'ax3']:
        ax = eval(sax)

        for date in dates:
            spectrum = (batch,date)
            xxp = dxp[spectrum]['2theta']
            Ixp = dxp[spectrum]['I']
            ax.plot(xxp, Ixp/np.max(Ixp[np.logical_and(xxp>xlim[sax][0], xxp<xlim[sax][1])]), 
                    label=f'{compounds[batch]},{date}')

        ax.set_xlabel(xlabel)
        ax.set_xlim(xlim[sax])
        ax.set_ylim([0,1.1])

    ax1.set_ylabel(ylabel)
    ax2.set_title('XRD patterns of Y-substituted TmVO4 before and after sintering')
    plt.legend()

In [56]:
plt.savefig(f'./{todaystr}_YTmVO4_analysis/{todaystr}_30Y-TmVO4_sintering.pdf', bbox_inches='tight')
# plt.figure(fig[0].number)
# plt.savefig(f'./{todaystr}_YTmVO4_analysis/{todaystr}_15Y-TmVO4_sintering.pdf', bbox_inches='tight')