# <div align="center">Aim of the code</div>

1) To check for erronous .cif files and delete them from the folder

# Libraries

In [1]:
import os

# import the library for manimulating cif data
import Dans_Diffraction as dif               # "pip install Dans-Diffraction" for installing this package

import numpy as np

# Create a list containing names of all the .cif files

In [2]:
# Create an empty list to store the paths of all .cif files
file_paths = []

training_data_directory = './cif_files/training/'             # ./ means current directory

for file in os.listdir(training_data_directory):
    if ".cif" in file:
        file_paths.append('./cif_files/training/' + file)

In [3]:
file_paths[:5]

['./cif_files/training/1010086.cif',
 './cif_files/training/1010099.cif',
 './cif_files/training/1010103.cif',
 './cif_files/training/1010376.cif',
 './cif_files/training/1010389.cif']

# Create a function to check for erronous .cif files and delete them

In [4]:
def check_error_files(file):

    # define relevant parameters
    energy_kev = dif.fc.wave2energy(1.54059)

    try:
        xtl = dif.Crystal(file);         # ; suppresses the output a given line of code

        # get the lattice parameters, a,b,c, and alpha, beta, gamma
        lattice_parameters = xtl.Cell.lp();

        # get the 2theta, corresponding intensities, and reflections (h,k, l values (Miler indices)

        xtl.Scatter.setup_scatter(energy_kev=energy_kev, min_twotheta=5, max_twotheta=100, scattering_type='xray', powder_units='tth');
        twotheta, intensity, reflections = xtl.Scatter.powder(units='tth', peak_width=0.01, background=0);

    except:
        if os.path.isfile(file):         # means if the "file" exists then
            os.remove(file)              # means remove the "file"
            print("erroneous file deleted")
#        pass



# Run the "check_error_files" function in parallel for different .cif files

In [None]:
# %%capture
# The above line of code supresses the output of this entire cell

from joblib import Parallel, delayed

Parallel(n_jobs = -1,timeout=None, max_nbytes = None, backend= 'loky')(delayed(check_error_files)(file) for file in file_paths);