Imports.

In [None]:
import sys
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import NMF
from diffpy.utils.parsers.loaddata import loadData

Ensuring that data files are in place.

In [None]:
data_path = Path.cwd() / "data"
if not data_path.exists():
    data_path.mkdir()
    sys.exit(f"\n{80*'-'}\nA folder called '{data_path.name}' has been created."
             f"\nPlease place your data files there and rerun the cell."
             f"\n{80*'-'}")
data_files = list(data_path.glob("*.*"))
if len(data_files) == 0:
    sys.exit(f"\n{80*'-'}\nNo files were found in the '{data_path.name}' "
             f"folder.\nPlease place your data files there and rerun the cell."
             f"\n{80*'-'}")
s = f"The following files were found in the '{data_path.name}' folder:\n"
for i, e in enumerate(data_files):
    s += f"{i}\t{e.name}\n"
print(f"\n{80*'-'}\n{s}{80*'-'}")

Loading data from files.

In [None]:
d = {}
print(f"\n{80*'-'}\nLoading data...")
for i, e in enumerate(data_files):
    print(f"\t{i}\t{e.name}")
    data = loadData(e)
    x, y = data[:, 0], data[:, 1]
    d[i] = dict(path=e, x=x, y=y)
    if i == 0:
        yarray = y
    else:
        yarray = np.column_stack((yarray, y))
print(f"Done loading data.\n{80*'-'}\n"
      f"xmin = {np.amin(x)}, xmax = {np.amax(x)}\n{80*'-'}\n"
      f"shape of stacked y-array: {yarray.shape}\n{80*'-'}")

Getting the number of each frames, assuming it to be prepended to the filenames.

In [None]:
frames = np.array([int(f.stem.split("_")[0]) for f in data_files])
print(frames)

Function to get index of value in array.

In [None]:
def get_idx(array, value):
    for i, e in enumerate(array):
        if e >= value:
            break
    
    return i

State minimum and maximum $x$-value to conduct analysis for.

In [None]:
xmin, xmax = 1, 30

Shaping $y$-array to conduct pca for.

In [None]:
idx_min, idx_max = get_idx(x, xmin), get_idx(x, xmax)
X = yarray[idx_min:idx_max, :].T
x = x[idx_min:idx_max]
print(f"\n{80*'-'}\nshape of X (shaped yarray): {X.shape}\n{80*'-'}")

Plot function to plot reconstruction error.

In [None]:
def plot_re(re, plot_paths):
    x = np.arange(1, len(re) + 1)
    xmin, xmax = np.amin(x), np.amax(x)
    xrange = xmax - xmin
    colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]
    fig, ax = plt.subplots(figsize=(12, 4))
    ax.plot(x, re, "-o", c=colors[0])
    ax.set_xlabel(r"$n$", fontsize=20)
    ax.set_ylabel(r"$re$", fontsize=20)
    ax.tick_params(axis="both", which="both", labelsize=14)
    ax.set_xlim(xmin - 0.01 * xrange, xmax + 0.01 * xrange)
    ax.minorticks_on()
    for p in plot_paths:
        plt.savefig(p / f"re.{p.name}", bbox_inches="tight", dpi=300)
    plt.show()

    return None

Function to conduct nmf analysis for range of components to inspect the  
reconstruction error.

In [None]:
def nmf_re(X, n_range):
    re = []
    offset = np.amin(X)
    if offset < 0:
        X = X - offset
    print(f"\n{80*'-'}\nConducting nmf analysis for range of components...")
    for i in n_range:
        print(f"\t{i}")
        nmf = NMF(n_components=i, init="nndsvdar", max_iter=10**3)
        nmf.fit(X.T)
        re.append(nmf.reconstruction_err_)
    print(f"Done.\n{80*'-'}")
    
    return re

Conducting nmf analysis for range of components.

In [None]:
re = nmf_re(X, np.arange(1, 11))

Plotting reconstruction error as a function of the number of components.

In [None]:
plot_folders = ["png"]
plot_paths = [Path.cwd() / folder for folder in plot_folders]
for p in plot_paths:
    if not p.exists():
        p.mkdir()
print(f"\n{80*'-'}\nExplained variance ratio as a function of number of "
      f"components:")
plot_re(re, plot_paths)
print(f"{80*'-'}")

Function to conduct nmf analysis.

In [None]:
def nmf(X, n_components):
    offset = np.amin(X)
    if offset < 0:
        X = X - offset
    nmf = NMF(n_components=n_components,
              init="nndsvdar",
              max_iter=10**3,
              )
    nmf.fit(X)
    components = nmf.components_
    weights = nmf.transform(X)
    weights_sum = weights.sum(1)
    weights_norm = np.array([weights[i, :] / weights_sum[i]
                                for i in range(len(weights_sum))])
    return offset, components, weights, weights_norm

Plot function to plot nmf weights as a function of frame number.

In [None]:
def plot_weights(frames, weights, name, plot_paths):
    xmin, xmax = np.amin(frames), np.amax(frames)
    xrange = xmax - xmin
    xlim_factor = 0.015
    fig, ax = plt.subplots(figsize=(12, 4))
    for i in range(weights.shape[1]):
        ax.plot(frames, weights[:, i], "-o", label=i+1)
    ax.set_xlim(xmin - xlim_factor * xrange, xmax + xlim_factor * xrange)
    ax.set_xlabel("frame", fontsize=20)
    ax.set_ylabel("$w$", fontsize=20)
    ax.tick_params(axis="both", labelsize=14)
    ax.minorticks_on()
    ax.legend(loc="upper center", 
              ncols=weights.shape[1],
              framealpha=0,
              borderaxespad=-2,
              )
    for p in plot_paths:
        plt.savefig(p / f"{name}.{p.name}", bbox_inches="tight", dpi=600)
    plt.show()

    return None

Plot function to plot nmf components.

In [None]:
def plot_components(x, components, offset, name, plot_paths):
    xmin, xmax = np.amin(x), np.amax(x)
    xrange = xmax - xmin
    xlim_factor = 0.015
    colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]
    fig, axs = plt.subplots(figsize=(12, 4),
                            nrows=components.shape[0],
                            sharex=True,
                            sharey=True,
                            gridspec_kw=dict(hspace=0),
                            )
    for i in range(components.shape[0]):
        if offset < 0:
            y = components[i, :] - components[i, -500:-1].mean()
        else:
            y = components[i, :]
        axs[i].plot(x, y, c=colors[i], label=i+1)
        axs[i].set_xlim(xmin, xmax)
        axs[i].legend(loc="upper right", framealpha=0, fontsize=14)
        axs[i].minorticks_on()
    axs[-1].set_xlabel("$r\;[\mathrm{\AA}]$", fontsize=20)
    fig.supylabel("$G\;[\mathrm{\AA}^{-2}]$", fontsize=20, x=0.04)
    for p in plot_paths:
        plt.savefig(p / f"{name}.{p.name}", bbox_inches="tight", dpi=600)
    plt.show()

    return None

Range of number of components to conduct nmf analysis for.

In [None]:
range_components = range(2, 6)

Conducting nmf analyses for range of number of components and plotting.

In [None]:
print(f"{80*'-'}\nConducting nmf analysis and plotting for range of "
      f"components...")
for n in range_components:
    print(f"\n{80*'-'}\n\t{n}")
    offset, components, weights, weights_norm = nmf(X, n)
    plot_weights(frames, weights_norm, f"nmf_weights_n={n}", plot_paths)
    plot_components(x, components, offset, f"nmf_components_n={n}", plot_paths)