In [24]:
import numpy as np
import pandas as pd
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
mpdr = MPDataRetrieval() # or mpdr = MPDataRetrieval(api_key='YOUR_KEY')

In [23]:
def extract_xrd_data(xrd_data):
    """
    Extracts the relevant XRD data from the dictionary obtained from MPD
    
    Parameters:
    ----------
    xrd_data : Dictionary
      The dictionary of data obtained from MPD 
    
    Returns:
    ----------
    clean_df: Pandas dataframe
        The top 10 XRD peaks and their corresponding two theta values for the material
    """
    
    # Checking if data has 10 or more peaks
    # Will return nothing and skip the material if less than 10
    if len(xrd_data['Cu']['pattern']) >= 10:
        
        # Extracting out the amplitude and two theta values from the dictionary contained inside the received data
        # then turning it into a pandas dataframe.
        dirty_df = pd.DataFrame(xrd_data['Cu']['pattern'], columns=xrd_data['Cu']['meta']) # Converts data into dataframe
        dirty_df.drop(['hkl','d_spacing'], axis=1, inplace=True) # Disposes of the hkl and d-spacing data

        # Sorting the peaks into the top 10 with the highest peaks
        dirty_df.sort_values('amplitude', ascending=False, inplace=True) # Sorts peaks from highest to smallest
        dirty_df.reset_index(drop=True, inplace=True) # Reseting index
        clean_df = dirty_df[:10] # Dropping all peaks below the top ten 
        
        return clean_df
    
    else:
        return None

In [20]:
# Function to reformat the data after cleaning
# Takes the dataframe and turns it into a dictionary wwhere all data points have a unique key
def reformat_xrd_data(xrd_data):
    """
    Reformats the cleaned data obtained from the extract_xrd_data function into a dictionary
    
    Parameters:
    ----------
    xrd_data : Dictionary
      The dictionary of data obtained from MPD 
    
    Returns:
    ----------
    clean_df: Pandas dataframe
        The top 10 XRD peaks and their corresponding two theta values for the material
    """
    
    # Checks if data was returned from the extracted data function
    # Skips material if nothing is returned
    if isinstance(extract_xrd_data(xrd_data), pd.DataFrame):
        # Cleaning data and creating empty dictionary
        clean_df = extract_xrd_data(xrd_data)
        mat_dict = {}

        # Loop to assign each data point to a key and stores it within the dictionary
        for i in range(0,20):
            if i < 10:
                amp_key = ('amplitude_' + str(i))
                mat_dict[amp_key] = clean_df['amplitude'][i]

            else:
                theta_key = ('two_theta_' + str(i-10))
                mat_dict[theta_key] = clean_df['two_theta'][i-10]

        return mat_dict
    
    else:
        return None

In [21]:
# Function 
def produce_data(df):
    """
    Reformats the cleaned data obtained from the extract_xrd_data function into a dictionary
    
    Parameters:
    ----------
    xrd_data : Dictionary
      The dictionary of data obtained from MPD 
    
    Returns:
    ----------
    clean_df: Pandas dataframe
        The top 10 XRD peaks and their corresponding two theta values for the material
    """
    
    full_dict = {}
    
    for i in range(len(df)):
        
        if reformat_xrd_data(df['xrd'].iloc[i]) != None:
            ID = df.index[i]
            mat_dict = reformat_data(df['xrd'].iloc[i])
            full_dict[ID] = mat_dict
            
        else:
            continue
    
    full_df = pd.DataFrame.from_dict(full_dict, orient='index')
    
    return full_df

In [22]:
df = mpdr.get_dataframe(criteria='Si', properties=['xrd'])
produce_data(df).head()

Unnamed: 0,amplitude_0,amplitude_1,amplitude_2,amplitude_3,amplitude_4,amplitude_5,amplitude_6,amplitude_7,amplitude_8,amplitude_9,two_theta_0,two_theta_1,two_theta_2,two_theta_3,two_theta_4,two_theta_5,two_theta_6,two_theta_7,two_theta_8,two_theta_9
mp-1001113,100.0,78.288443,45.261343,40.440821,30.707488,26.515207,22.092113,21.73767,16.821364,16.690467,36.834948,46.09171,38.160738,171.874808,45.443514,176.442253,60.797841,148.648332,87.640251,80.839115
mp-1056579,100.0,91.828984,79.970463,45.423897,30.872127,26.312484,23.760237,15.234896,14.045885,13.052846,44.350136,167.464337,37.945623,73.006051,140.704562,54.746898,123.597176,85.886608,135.942882,93.269152
mp-10649,100.0,35.031568,24.90166,23.115411,22.51694,20.830194,16.146856,15.58578,15.106204,14.134877,43.934318,37.766028,130.152767,39.42053,72.471977,83.797856,55.734788,71.487094,121.359663,87.842079
mp-1072544,100.0,74.250236,32.8647,32.201109,18.813035,17.05364,15.411324,14.668014,14.527636,13.937115,173.362106,18.640121,46.743236,32.580631,160.193335,50.741974,54.52423,42.462094,26.480052,98.859603
mp-1079297,100.0,28.393003,27.930625,27.680183,27.341183,24.356947,23.828151,21.64095,21.245367,20.805431,27.389812,25.843727,50.463312,46.935059,171.425514,51.286901,20.161639,52.606616,17.884802,177.432037
