In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import warnings
import pandas as pd
import random
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process import GaussianProcessRegressor, kernels
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline


# warning表示を消しておく
warnings.filterwarnings('ignore')

In [None]:
DATA_NAME = "carbon"
ACQ_FUNC = "UCB" # UCB or TS

# 獲得関数のパラメタ
V = 0.3 # for UCB
XI = 0.1 # for EI and PI
RANDOM_STATE = 0

In [None]:
METADATA = {"outputdir": "image_executed", "prefix": "BayesianOpt_materilals", 
              "dataname": DATA_NAME, "acq": ACQ_FUNC, 
              "seed_simulation": RANDOM_STATE, "v": V, "xi": XI}

In [None]:

def get_data(sample):
    """load data.

    sample は"carbon", "ReCo", "ZBWZ", "ZBWZ3"を取れる．

    Args:
        sample (str): データ名

    Returns:
        tuplex containing

        - pd.DataFrame: データ
        - list[str]: 説明変数名リスト
        - str: 目的変数名
    """
    if sample == "carbon":
        df = pd.read_csv(
            "../data_calculated/Carbon8_descriptor_energy.csv", index_col=[0])
        descriptor_names = ['a0.25_rp1.5', 'a0.25_rp2.5', 'a0.5_rp1.5', 'a0.5_rp2.5',
                            'a1.0_rp1.5', 'a1.0_rp2.5']
        # descriptor_names = descriptor_names[:3]  # 説明変数を制限する．
        target_name = 'minus_energy'
        df = df.sort_values(
            by=target_name, ascending=False).reset_index(drop=True)
    elif sample == "ReCo":
        df = pd.read_csv("../data/TC_ReCo_detail_descriptor.csv")
        descriptor_names = ['C_R', 'C_T', 'vol_per_atom', 'Z', 'f4', 'd5', 'L4f', 'S4f', 'J4f',
                            '(g-1)J4f', '(2-g)J4f']
        target_name = 'Tc'
        mata_labels = ['name', 'polytyp', 'ref',
                       'author', 'link', 'comment', 'polytyp2']
        df = df.sort_values(
            by=target_name, ascending=False).reset_index(drop=True)
    elif sample == "ZBWZ":
        df = pd.read_csv("../data/ZB_WZ_dE_rawdescriptor.csv")
        descriptor_names = ['IP_A', 'EA_A', 'EN_A', 'Highest_occ_A',
                            'Lowest_unocc_A', 'rs_A', 'rp_A', 'rd_A', 'IP_B', 'EA_B', 'EN_B',
                            'Highest_occ_B', 'Lowest_unocc_B', 'rs_B', 'rp_B', 'rd_B']
        target_name = 'dE'
        df[target_name] = - df[target_name]  # 最大値は離れているので最小値にする．
        df = df.sort_values(
            by=target_name, ascending=False).reset_index(drop=True)
    elif sample == "ZBWZ3":
        df = pd.read_csv("../data_calculated/ZB_WZ_dE_3var.csv")
        descriptor_names = ['desc1', 'desc2', 'desc3']
        target_name = 'dE'
        df[target_name] = - df[target_name]  # 最大値は離れているので最小値にする．
        df = df.sort_values(
            by=target_name, ascending=False).reset_index(drop=True)
    return df, descriptor_names, target_name


g_df, g_descriptor_names, g_target_name = get_data(DATA_NAME)


In [None]:
if DATA_NAME == "carbon":
    from IPython.display import display
    display(g_df[["key", "minus_energy", "polytype"]][:10])


In [None]:

def evaluate_break_condition(train):
    """evaluate the conditon to break the loop

    Args:
        train (list[int]): training index

    Returns:
        bool: True if train contains 0 or 1
    """
    # io : list of actions
    if 0 in train:
        return True
    return False


In [None]:
# pairplotの表示

from copy import deepcopy
import seaborn as sns

g_columns = deepcopy(g_descriptor_names)
g_columns.append(g_target_name)
sns.pairplot(g_df[g_columns])


In [None]:
def get_Xy(df, descriptor_names, target_name):
    Xraw = df.loc[:, descriptor_names].values
    yraw = df.loc[:, target_name].values

    # standardize
    scalerX = StandardScaler()
    X = scalerX.fit_transform(Xraw)
    scalery = MinMaxScaler()
    y = scalery.fit_transform(yraw.reshape(-1, 1)).reshape(-1)
    return X, y, scalerX, scalery


g_X, g_y, g_scalerX, g_scalery = get_Xy(
    g_df, g_descriptor_names, g_target_name)


In [None]:
plt.plot(g_X)


In [None]:
plt.plot(g_y)


In [None]:
# 表示用の補助関数
def plot_GPR(X, y, Xtrain, ytrain, y_mean, y_std, acq, it, ia=None, metadata=METADATA,
            tickfontsize=20, labelfontsize=20, legendfontsize=20, titlefontsize=20,
            figsize=(15,4)):
    """plot y.mean += y.std and aquisition functions

    Args:
        X (np.array): descriptor
        y (np.array): target values
        Xtrain (np.array): training descriptor data 
        ytrain (np.array): training target values
        yp_mean (np.array): the mean values of predictions
        yp_std (np.array): the stddev vlaues of predictions
        acq (np.array): aquisition function values
        ia (np.array, optional): a list of actions. Defaults to None.
        metadata (dict): 表示用データ. Defaults. to METADATA.
        tickfontsize (int. optional): ticks font size. Defaults to 20.
        labelfontsize (int, optional): label font size. Defaults to 20.
        legendfontsize (int, optional): legend font size. Defauls to 20.
        titlefontsize (int, optional): title font size. Defauls to 25.    
        figsize (Tuple[float], optional): figure size. Defaults to (20,30).
    """
    dataname = metadata["dataname"]
    acqname = metadata["acq"]

    yminus = y_mean - y_std
    yplus = y_mean + y_std
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize)

    ax1.plot(Xtrain, ytrain, "o", color="blue", label="train")
    ax1.fill_between(X, yminus, yplus, color="red", alpha=0.2)
    ax1.plot(X, y_mean, color="red", label="predict$\pm\sigma$", linewidth=0.2)
    ax1.plot(X, y, "--", color="blue", label="expriment")
    ax1.set_xlabel("index", fontsize=labelfontsize)
    ax1.legend(fontsize=legendfontsize, bbox_to_anchor=(-.15, 1), loc='upper right',)
    ax1.tick_params(axis = 'x', labelsize =tickfontsize)
    ax1.tick_params(axis = 'y', labelsize =tickfontsize)  
    
    ax2.plot(X, acq, color="green", label="aquisition function", linewidth=1)
    if ia is not None:
        # ax2.axvline(ia,color="green",linestyle="--")
        plt.plot(ia, acq[ia], "o", color="purple", label="selected action")
    ax2.set_xlabel("index", fontsize=labelfontsize)
    ax2.legend(fontsize=legendfontsize, bbox_to_anchor=(1.05, 1), loc='upper left',)
    ax2.tick_params(axis = 'x', labelsize =tickfontsize)
    ax2.tick_params(axis = 'y', labelsize =tickfontsize)      
    fig.suptitle("iteration {}".format(it+1), fontsize=titlefontsize)
    
    fig.tight_layout()

    filename = "{}_BayseOpt_acq_{}_{}.png".format(dataname, acqname, it)
    print(filename)
    fig.savefig(os.path.join(metadata["outputdir"],filename))
    fig.show()



In [None]:
from scipy.stats import norm


def search_candidate_UCB(it, train, X, y, reg, param, plot=True):
    """search next action in the UCB method

    Args:
        it (int): the number of iteration
        train (np.array): the index of training data
        X (np.array): descriptor
        y (np.array): target values
        reg (regressor): regressor
        param (dict): parameter of UCB
        plot (bool): True if show image

    Returns:
        int: next action
    """
    # GPR training data setの作成
    Xtrain = X[train]
    ytrain = y[train]
    reg.fit(Xtrain, ytrain)
    print("kernel=", reg.kernel_)
    yp_mean, yp_std = reg.predict(X, return_std=True)
    v = param["v"]
    acq = yp_mean + yp_std*np.sqrt(v*it)
    io = np.argsort(acq)[::-1]
    if io[0] in train:
        # train listに無い最も小さいindexを探す
        print("search the other best candidate")
        for ia1 in io:
            if ia1 not in train:
                ia = ia1
                break
    else:
        ia = io[0]
    if plot:
        idxall = list(range(y.shape[0]))
        plot_GPR(idxall, y, train, ytrain, yp_mean, yp_std, acq, it, ia)
    return ia


def search_candidate_TS(it, train, X, y, reg, param, plot=True):
    """search next action in the TS method

    Args:
        it (int): the number of iteration
        train (np.array): the index of training data
        X (np.array): descriptor
        y (np.array): target values
        reg (regressor): regressor
        param (dict): TS parameter
        plot (bool): True if showing image

    Returns:
        int: next action
    """
    seed_simulation = param["seed_simulation"]
    # GPR training data setの作成
    Xtrain = X[train]
    ytrain = y[train]
    reg.fit(Xtrain, ytrain)
    print("kernel=", reg.kernel_)
    y_mean, y_std = reg.predict(X, return_std=True)

    # draw a DATA_NAME from GPR, random_stateを指定しないと毎回random_state＝０になる．
    acq = reg.sample_y(X, random_state=seed_simulation+it)
    acq = acq.reshape(-1)
    if len(acq.shape) != 1:
        print("acq.shape", acq.shape)
        raise
    io = np.argsort(acq)[::-1]
    if io[0] in train:
        # train listに無い最も小さいindexを探す
        print(io[0], "in training set, search the other best candidate")
        for ia1 in io:
            if ia1 not in train:
                ia = ia1
                break
    else:
        ia = io[0]
    if plot:
        idxall = list(range(y.shape[0]))
        plot_GPR(idxall, y, train, ytrain, y_mean, y_std, acq, it, ia)
    return ia


def search_candidate_EI(it, train, X, y, reg, param, plot=True):
    """search next action in the EI method

    Args:
        it (int): the number of iteration
        train (np.array): the index of training data
        X (np.array): descriptor
        y (np.array): target values
        reg (regressor): regressor
        param (dict): EI parameter
        plot (bool): True if showing image

    Returns:
        int: next action
    """
    # GPR training data setの作成
    Xtrain = X[train]
    ytrain = y[train]
    reg.fit(Xtrain, ytrain)
    print("kernel=", reg.kernel_)
    y_mean, y_std = reg.predict(X, return_std=True)
    fp = np.max(ytrain)
    #fp = np.max(y_mean)
    xi = param["xi"]
    print("xi", xi)
    z = (y_mean - fp - xi)/y_std
    acq = (y_mean - fp - xi)*norm.cdf(z) + y_std * norm.pdf(z)
    if len(acq.shape) != 1:
        raise
    io = np.argsort(acq)[::-1]
    if io[0] in train:
        # train listに無い最も小さいindexを探す
        print("search the other best candidate")
        for ia1 in io:
            if ia1 not in train:
                ia = ia1
                break
    else:
        ia = io[0]
    if plot:
        idxall = list(range(y.shape[0]))
        plot_GPR(idxall, y, train, ytrain, y_mean, y_std, acq, it, ia)
    return ia


def search_candidate_PI(it, train, X, y, reg, param, plot=True):
    """search next action in the PI method

    Args:
        it (int): the number of iteration
        train (np.array): the index of training data
        X (np.array): descriptor
        y (np.array): target values
        reg (regressor): regressor
        param (dict): EI parameter
        plot (bool): True if showing image

    Returns:
        int: next action
    """
    # GPR training data setの作成
    Xtrain = X[train]
    ytrain = y[train]
    reg.fit(Xtrain, ytrain)
    print("kernel=", reg.kernel_)
    y_mean, y_std = reg.predict(X, return_std=True)
    fp = np.max(ytrain)
    #fp = np.max(y_mean)
    xi = param["xi"]
    z = (y_mean - fp - xi)/y_std

    acq = norm.cdf(z)
    acq = acq.reshape(-1)
    io = np.argsort(acq)[::-1]
    if io[0] in train:
        # train listに無い最も小さいindexを探す
        print("search the other best candidate")
        for ia1 in io:
            if ia1 not in train:
                ia = ia1
                break
    else:
        ia = io[0]
    if plot:
        idxall = list(range(y.shape[0]))
        plot_GPR(idxall, y, train, ytrain, y_mean, y_std, acq, it, ia)
    return ia


In [None]:
def select_initial_traininigset(nall, nselect=10, seed=1):
    """select initial actions

    [0, nselect]のindexは選択しない．

    Args:
        nselect (int, optional): the number of actions to select randomly. Defaults to 10.
        seed_initial_selection (int, optional): random seed. Defaults to 1.

    Returns:
        list: a list of actions
    """
    np.random.seed(seed)
    random.seed(seed)
    train = random.sample(range(nselect, nall), nselect)
    print("initial action", train)
    return train


In [None]:
def get_parameter(sample, acqfunc, setting):
    """計算パラメタを得る．

    Args:
        sample (str): データ名
        acqfunc (str): 獲得関数名
        setting (int): 設定インデックス

    Raises:
        ValueError: unknownな("carbon", acqfunc, setting)の組み合わせ
        ValueError: unknownな("carbon", acqfunc)の組み合わせ

    Returns:
        _type_: _description_
    """
    print(sample, acqfunc, setting)
    # random seedの設定
    if sample == "carbon":
        if acqfunc == "UCB":
            # 以下はUCBの場合のパラメタセットを示す．
            if setting == 1:
                # 11回
                seed_initial_selection = 0
                nselect = 10
                plotit = True
                maxiteration = 50
            elif setting == 2:
                # 20回
                seed_initial_selection = 2
                nselect = 10
                plotit = True
                maxiteration = 50
            elif setting == 3:
                # 40回
                seed_initial_selection = 1
                nselect = 10
                plotit = True
                maxiteration = 50
                print("seed_initial_selection",seed_initial_selection)
            else:
                raise ValueError("unknown setting={} for sample={} and acqfunc={}".format(setting, sample, acqfunc))
        elif acqfunc == "TS":
            if setting == 1:
                # 45回
                seed_initial_selection = 0
                nselect = 10
                plotit = True
                maxiteration = 50
            elif setting == 2:
                # 10回
                seed_initial_selection = 1
                nselect = 10
                plotit = True
                maxiteration = 50
            elif setting == 3:
                # 35回
                seed_initial_selection = 3
                nselect = 10
                plotit = True
                maxiteration = 50
            else:
                raise ValueError("unknown setting={} for sample={} and acqfunc={}".format(setting, sample, acqfunc))
        else:
            raise ValueError("unknown acqfunc={}".format(acqfunc))
    else:
        seed_initial_selection = 1
        nselect = 3
        plotit = True
        maxiteration = 50
    return seed_initial_selection, nselect, plotit, maxiteration


g_seed, g_nselect, g_plotit, g_maxiteration = get_parameter(
    DATA_NAME, ACQ_FUNC, setting=1)

In [None]:
# 獲得関数の引数の違いを吸収するために変数を定義する．
g_aqsparam = {"v": 0.3, "xi": 0.1, "seed_simulation": 0}

g_train = select_initial_traininigset(
    g_X.shape[0], g_nselect, g_seed)


def make_model(optimize):
    """ガウス過程回帰モデルを得る．

    Args:
        optimize (bool): optimizer flag of GaussianProcessRegressor

    Returns:
        GaussianProcessRegressor: ガウス過程回帰モデル
    """
    if optimize:
        kernel = RBF(length_scale=0.5)
        reg = GaussianProcessRegressor(kernel=kernel)
    else:
        kernel = RBF(length_scale=0.5)
        reg = GaussianProcessRegressor(kernel=kernel, optimizer=None)
    return reg


g_reg = make_model(optimize=True)


def search_all(X, y, reg, train, acqfunc, aqsparam, maxiteration, plotit):
    """maxiteration回探索する．

    acqfuncは"USB", "TS", "PI", "EI"を選択可能．

    Args:
        X (np.ndarray): 全てのX
        y (np.ndarray): 全てのy
        reg (GaussianProcessRegressor): ガウス過程回帰モデル
        train (list[int]): 観測済データ（訓練データ）
        acqfunc (str): 獲得関数名
        aqsparam (dict)): 獲得関数計算時のパラメタ
        maxiteration (int): 最大探索回数
        plotit (bool): 図示するかどうか．

    Raises:
        ValueError: _description_

    Returns:
        _type_: _description_
    """
    if acqfunc == "UCB":
        search_function = search_candidate_UCB
    elif acqfunc == "TS":
        search_function = search_candidate_TS
    elif acqfunc == "PI":
        search_function = search_candidate_PI
    elif acqfunc == "EI":
        search_function = search_candidate_EI
    else:
        raise ValueError("unknown acqfunc={}".format(acqfunc))
            
    for _it in range(maxiteration):
        print()
        print("iteration=", _it+1)
        print("action=", train)

        _ia = search_function(_it, train, X, y, reg, aqsparam, plot=plotit)

        print("next action=", _ia)

        train = np.hstack([train, _ia])
        if evaluate_break_condition(train):
            print("found both minima, iteration=", _it)
            break

    print("\nfinal action", train)
    return train


g_train = search_all(g_X, g_y, g_reg, g_train, ACQ_FUNC,
                     g_aqsparam, g_maxiteration, g_plotit)


In [None]:
print(g_train)
print(len(g_train)-g_nselect)


In [None]:
from IPython import display

have_PIL = False
try:
    from PIL import Image
    have_PIL = True
except ModuleNotFoundError:
    have_PIL = False

print(have_PIL)

if have_PIL:
    def make_acq_animation(nselect, train, metadata=METADATA):
        """複数獲得関数図からアニメーションを作成する．

        Args:
            nselect (int): 選択開始index
            train (list[int])): actionリスト（観測済データリスト）
            metadata (dict): 表示用データ. Defaults to METADATA.

        Returns:
            _type_: _description_
        """
        outputdir = metadata["outputdir"]
        dataname = metadata["dataname"]
        acq = metadata["acq"]
        imglist = []
        for idx in range(nselect, len(train)):
            idx = idx-nselect
            filename = os.path.join(outputdir,"{}_BayseOpt_acq_{}_{}.png".format(dataname, acq, idx))
            print(filename)
            imglist.append(Image.open(filename))
        # 最初のimageを用いてsaveするという仕様．
        filename_fig = os.path.join(outputdir,"{}_BayseOpt_acq.gif".format(dataname))
        imglist[0].save(filename_fig,
                        save_all=True,
                        append_images=imglist[1:], duration=500,
                        interlace=False,
                        loop=1)
        print("saved to", filename_fig)
        return filename_fig


    g_filename_acqgif = make_acq_animation(g_nselect, g_train)


In [None]:
# display.Image(g_filename_acqgif)


In [None]:
from sklearn.decomposition import PCA
from matplotlib.colors import LinearSegmentedColormap


def carbon_vis_parameter(df):
    """carbonを選択した場合の”diamond", "graphite"の図示パラメタを得る．

    Args:
        df (pd.DataFrame): データ

    Returns:
        tuplex containing

        - list[int]: ”diamond", "graphite"のindex
        - list[str]: ”diamond", "graphite"のpolytype名
        - list[str]: ”diamond", "graphite"の表示シンボル
    """
    ilist = [0, 1]
    ilabellist = ["diamond", "graphite"]
    ilabellist = [df.loc[0, "polytype"], df.loc[1, "polytype"]]
    imarkerlist = ["s", "^"]
    return ilist, ilabellist, imarkerlist


def show_energysurface(X, y, df=None, metadata=METADATA):
    """show target value heatmap in the 2D descriptor space

    Args:
        X (np.array): descriptor
        y (np.array): target values
        sample (str): データ名
        df (pd.DataFrame): データ．carbonの場合にpolytypeを得る．
        metadata (dict): 表示用データ. Defaults to METADATA
    """
    sample = metadata["dataname"]
    pca = PCA(2)
    pca.fit(X)
    X2d = pca.transform(X)

    fig, ax = plt.subplots()
    cm = plt.get_cmap("rainbow")
    im = ax.scatter(X2d[:, 0], X2d[:, 1], marker=".", c=y, cmap=cm, alpha=1)
    fig.colorbar(im, ax=ax)

    if sample == "carbon":
        ilist, ilabellist, imarkerlist = carbon_vis_parameter(df)

        for il, ilabel, imark in zip(ilist, ilabellist, imarkerlist):
            ax.scatter(X2d[il, 0], X2d[il, 1], marker=imark, c="black",
                       label=ilabel)
    else:
        il = 0
        ax.scatter(X2d[il, 0], X2d[il, 1], marker="o", c="black",
                   label="TOP")

    ax.legend()

    fig.show()


show_energysurface(g_X, g_y, g_df)


In [None]:
def show_2D_actions(X, y, nselect, train, df, sample, metadata=METADATA,
                    show_text = False, 
                    textfontsize=15, tickfontsize=15, legendfontsize=15, titlefontsize=15):
    """2Dに探索過程の可視化をする．

    Args:
        X (np.ndarray): 全X
        y (np.ndarray): 全y
        nselect (int): 選択開始index
        train (list[int]): 観測済データindexリスト
        df (pd.DataFrame): データ
        sample (str): データ名.
        metadata (dict, optional): data for display.
        show_text (bool, optional): show text in search map or not. Defaults to False.
        textfontsize (int, optional): text font size. Defaults to 15.
        tickfontsize (int, optional): ticks font size. Defaults to 15.
        legendfontsize (int, optional): legend font size. Defaults to 15.
        titlefontsize (int, optional): title font size. Defaults to 15.
    """
    pca = PCA(2)
    pca.fit(X)
    X2d = pca.transform(X)

    if sample == "carbon":
        ilist, ilabellist, imarkerlist = carbon_vis_parameter(df)

    for idx in range(nselect, len(train)):
        fig, ax = plt.subplots(figsize=(4, 3))
        ax.set_title("iteration {}".format(idx-nselect+1), fontsize=titlefontsize)
        cm = plt.get_cmap("rainbow")
        im = ax.scatter(X2d[:, 0], X2d[:, 1], marker=".",
                        c=y, cmap=cm, alpha=1)
        fig.colorbar(im, ax=ax)
        # 選択された点を大きく描く
        ax.scatter(X2d[train[:idx-1], 0], X2d[train[:idx-1], 1],
                   marker="x", c=y[train[:idx-1]],
                   vmin=y.min(), vmax=y.max(), cmap=cm)

        if sample == "carbon":
            # [0,1]を見つける問題なので大きく描く
            for il, ilabel, imark in zip(ilist, ilabellist, imarkerlist):
                ax.scatter(X2d[il, 0], X2d[il, 1], marker=imark, c="black",
                           label=ilabel)
        else:
            ax.scatter(X2d[0, 0], X2d[0, 1], marker="o", c="black",
                       label="TOP")

        i1 = train[idx-1]
        i2 = train[idx]
        if show_text:
            ax.text(X2d[i1, 0], X2d[i1, 1], str(i1), fontsize=textfontsize)
            ax.text(X2d[i2, 0], X2d[i2, 1], str(i2), fontsize=textfontsize)
        #  i1 -> i2の矢印を引く
        ax.arrow(X2d[i1, 0], X2d[i1, 1],
                 X2d[i2, 0] - X2d[i1, 0], X2d[i2, 1] - X2d[i1, 1],
                 width=0.05, head_width=0.5,
                 length_includes_head=True, color="black", alpha=1)
        # ax.legend(fontsize=legendfontsize)
        ax.tick_params(axis = 'x', labelsize =tickfontsize)
        ax.tick_params(axis = 'y', labelsize =tickfontsize)  
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        fig.tight_layout()
        dataname = metadata["dataname"]
        acqname = metadata["acq"]
        filename = os.path.join(metadata["outputdir"], 
                                "{}_BayseOpt_{}_PCA_{}.png".format(dataname, acqname, idx))
        print(filename)
        fig.savefig(filename)
        fig.show()


show_2D_actions(g_X, g_y, g_nselect, g_train, g_df, DATA_NAME)

In [None]:
from IPython import display

if have_PIL:
    def make_pca_gif(train, metadata=METADATA):
        """ベイズ最適化action過程のpng図からgifを作成する．

        Args:
            train (list[int]): action list (観測済データ)

        Returns:
            str: gif filename
        """
        dataname = metadata["dataname"]
        acqname = metadata["acq"]
        imglist = []
        for idx in range(g_nselect, len(train)):
            filename = os.path.join(metadata["outputdir"], 
                                    "{}_BayseOpt_{}_PCA_{}.png".format(dataname, acqname, idx))
            imglist.append(Image.open(filename))
        filename_gif = os.path.join(metadata["outputdir"],
                                    "{}_BayseOpt_{}_PCA.gif".format(dataname, acqname))
        imglist[0].save(filename_gif,
                        save_all=True,
                        append_images=imglist[1:], duration=300,
                        interlace=False,
                        loop=1)
        return filename_gif


    g_filename_pcagif = make_pca_gif(g_train)

    display.Image(g_filename_pcagif)

In [None]:
# display.Image(g_filename_pcagif)