# Example Analysis

### A high-throughput investigation of Fe–Cr–Al as a novel high-temperature coating for nuclear cladding materials

<a href="http://www.dx.doi.org/10.1088/0957-4484/26/27/274003">Article Link</a>

"Bunn, Jonathan Kenneth, Randy L. Fang, Mark R. Albing, Apurva Mehta, Matthew J. Kramer, Matthew F. Besser, and Jason R. Hattrick-Simpers. "A high-throughput investigation of Fe–Cr–Al as a novel high-temperature coating for nuclear cladding materials." Nanotechnology 26, no. 27 (2015): 274003."


Example: We want to plot some of the data from the above study using MDF.


In [None]:
from io import StringIO
from multiprocessing.pool import Pool

from mdf_forge.forge import Forge

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

%matplotlib inline
sns.set_context('poster')
sns.set_style('white')

## Retrieve Records

In [None]:
mdf = Forge()
res = mdf.match_field("mdf.source_name","fe_cr_al_oxidation").match_field("mdf.resource_type", "record").search()
print("Found {results} matches".format(results=len(res)))

In [None]:
res[0]

## Download data using HTTP

In [None]:
def format_get_cr_al_params(d):
    tmp_d = {}
    for key in d['atomic_composition_percent']:
        tmp_d[key] = float(d['atomic_composition_percent'][key])
    tmp_d['temperature_k'] = float(d['temperature_k']) if d['temperature_k'] != "Room" else 297.15  # Avg room temp
    return tmp_d
    
def get_fe_cr_al(r):
    res = next(mdf.http_stream(r))
    
    params = format_get_cr_al_params(r['fe_cr_al_oxidation']) 
    
    df = pd.read_csv(StringIO(res), sep="\t", header=None, names=["twotheta","counts"])
    return (params, df)

n_workers = 10
n_points = 300

mp = Pool(n_workers)
mdf_data = mp.map(get_fe_cr_al, res[:n_points])
mp.close()
mp.join()

# Aggregate Results

In [None]:
## Choose peak integration range (this is ~ a range for the Fe3O4 oxide)

integration_peak = 42.8
integration_width = 1.
integration_range = (integration_peak-integration_width,integration_peak+integration_width)

results = []

for data, df_tmp in mdf_data:
    result = {"data":{},"aggregation":0}
    result['data'] = data
    agg = df_tmp[(df_tmp.twotheta>integration_range[0]) & 
                 (df_tmp.twotheta<integration_range[1])]['counts'].sum()
    result['aggregation'] = agg
    results.append(result)

# Calculate normalization of the aggregation
m = max([r["aggregation"] for r in results])
for r in results:
    r["aggregation_norm"] = r["aggregation"] / m

# Plot Results in Various Ways

In [None]:
x = [r["data"]["Al"] for r in results]
y = [r["data"]["Cr"] for r in results]
s = [[r["aggregation_norm"]*1000 for r in results]]

fig, ax = plt.subplots()
plt.scatter(x, y, s=s, alpha=0.6)
ax.set_xlabel("Al Atomic %")
ax.set_ylabel("Cr Atomic %")
sns.despine()
plt.show()

## Joint Plots (Where we have observations)

In [None]:
ax = sns.jointplot(np.asarray(x), np.asarray(y), kind="kde", 
                   shade=True, stat_func=None, size=7).set_axis_labels("Al Atomic %", "Cr Atomic %")

In [None]:
ax = sns.jointplot(np.asarray(x), np.asarray(y), kind="hex", 
                   gridsize=7, size=7, stat_func=None).set_axis_labels("Al Atomic %", "Cr Atomic %")