In [3]:
import os
import sys
import pickle
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from skimage.filters import threshold_otsu
import tcripser
import gudhi,gudhi.hera,gudhi.wasserstein,persim

In [4]:
import plotly.graph_objs as go
import plotly.subplots as sp
import plotly.offline as pyo

In [5]:
inDir="Z:/data/diverse_metals"
os.chdir(inDir)
topTypes=os.listdir(inDir+"/diverseTop")

df=pd.read_csv(f"{inDir}/post-combustion-vsa-2-clean.csv",index_col=0)
mofNames=list(df.index)
cols=df.columns #[wc,sel]

In [10]:

class PDhash():
    def __init__(self, res=1, diags=None, maxHdim=2, persistThresh=0, mode='freq'):
        """upper bound resolution.
        In the case of sparce PD spaces, it may be useful to project a hash map of your dataset to the diagram space"""
        self.res = res
        self.maxD = maxHdim
        self.thresh = persistThresh
        self.bounds = [[np.inf, -np.inf] for b in range(maxHdim + 1)]
        self.img = {b: dict() for b in range(
            maxHdim + 1)}  # While this does impose extra time compared to np, it is ideal for map-reduce type parallelization
        self.mode = mode  # instead of freq, there is the 'set' (or None) option, that maps img[b][pt] to a set of indices rather than frequencies (dict:intensity)

    def addDiagRpp(self, diag, index):  ## note the index can be just an index number, or a numerical value
        ###although the numerical values (duplicate index) won't stack in the set
        """diag is {0:[(b,d),...],1: """
        if self.mode == "freq":
            for i in range(np.min([self.maxD + 1, len(diag)])):
                for k in diag[i]:
                    if k[1] - k[0] > self.thresh:
                        pt = (round(k[0] / self.res) * self.res, round(k[1] / self.res) * self.res)
                        if pt[0] < self.bounds[i][0]:
                            self.bounds[i][0] = pt[0]
                        if pt[1] > self.bounds[i][1]:
                            self.bounds[i][1] = pt[1]
                        if pt in self.img[i]:
                            if index in self.img[i][pt]:
                                self.img[i][pt][index] += 1
                            else:
                                self.img[i][pt][index] = 1

                        else:
                            self.img[i][pt] = {index: 1}
        else:
            for i in range(np.min([self.maxD + 1, len(diag)])):
                for k in diag[i]:
                    if k[1] - k[0] > self.thresh:
                        pt = (round(k[0] / self.res) * self.res, round(k[1] / self.res) * self.res)
                        if pt[0] < self.bounds[i][0]:
                            self.bounds[i][0] = pt[0]
                        if pt[1] > self.bounds[i][1]:
                            self.bounds[i][1] = pt[1]
                        if pt in self.img[i]:
                            if index in self.img[i][pt]:
                                self.img[i][pt].add(index)
                        else:
                            self.img[i][pt] = {index}

    def addDiagCubeRips(self, crispy, index):
        """diag is [[bi,b,d,bx,by,bz,dx,dy,dz],..] """
        pass

    def __getitem__(self, item):
        if type(item) == int and item <= self.maxD:  # item is bi
            return self.img[item]
        else:
            # return {b:self.img[b][pt] for b in range(self.maxD) for pt in self.img[b].keys()}
            return {b: self.img[b][tuple(item)] for b in range(self.maxD) if tuple(item) in self.img[b]}

    def remapIndex(self, index2New: dict):
        if self.mode == "freq":
            for b in self.img.keys():
                for pt in self.img[b].keys():
                    self.img[b][pt] = {index2New[k]: v for k, v in self.img[b][pt].items()}
        else:
            for b in self.img.keys():
                for pt in self.img[b].keys():
                    self.img[b][pt] = {index2New[k] for k in self.img[b][pt]}

    def subsetIndex(self, subfunc=lambda x: True):  # not to be confused with subsetBounds
        subPD = PDhash(res=self.res, diags=None, maxHdim=self.maxD, persistThresh=self.thresh, mode=self.mode)
        if self.mode == "freq":
            subPD.img = {b: {pt: {k: v for k, v in self.img[pt].items() if subfunc(k)} for pt in self.img[b].keys()} for
                         b in self.img.keys()}
        else:
            subPD.img = {b: {pt: {k for k in self.img[pt] if subfunc(k)} for pt in self.img[b].keys()} for b in
                         self.img.keys()}
        return subPD

        # change bounds

    def merge(self,pdStack):
        # first shared keys, then add any new keys
        assert pdStack.selfD == self.maxD
        assert pdStack.res == self.res
        if self.mode == 'freq' and pdStack.mode=='freq':
            for bi in range(self.maxD+1):
                for pt in set(self.img[bi].keys()) & set(pdStack.img[bi].keys()):
                    self.img[bi][pt].update({ind:self.img[bi][pt][ind]+pdStack.img[bi][pt]for ind in set(self.img[bi][pt].keys()) & set(pdStack.img[bi][pt].keys())})
                    self.img[bi][pt].update({ind:pdStack.img[bi][pt] for ind in set(pdStack.img[bi][pt].keys())-set(self.img[bi][pt].keys())})

                self.img[bi].update({pt:pdStack.img[bi] for pt in set(pdStack.img[bi].keys())-set(self.img[bi].keys())})

    # def indexImgMap(self,fn=lambda pt: np.sum([v*k for k,v in self.img])): #assuming index is something numerically useful like a property assocated to each PD

    def density_to_numpy(self, bi=None):
        if type(bi) == int and bi <= self.maxD:
            mi, ma = self.bounds[bi]
            if mi != np.inf and ma != -np.inf:
                life = int((self.bounds[bi][1] - self.bounds[bi][0]) / self.res + 1)
                densBox = np.zeros((life, life), dtype=np.uint32)
                for k, v in self.img[bi].items():
                    densBox[int((ma - k[1]) / self.res), int((k[0] - mi) / self.res)] = len(v)
                return densBox
            else:
                print(f"no points in {bi}")
        else:
            densBoxes = []
            for bi in range(self.maxD + 1):
                mi, ma = self.bounds[bi]
                if mi != np.inf and ma != -np.inf:
                    life = int((self.bounds[bi][1] - self.bounds[bi][0]) / self.res + 1)
                    densBox = np.zeros((life, life), dtype=np.uint32)
                    for k, v in self.img[bi].items():
                        densBox[int((ma - k[1]) / self.res), int((k[0] - mi) / self.res)] = len(v)
                    densBoxes.append(densBox)
                else:
                    print(f"no points in {bi}")
                    densBoxes.append([])
            return densBoxes

    def mean_to_numpy(self, bi=None):
        roundDig = 8
        if type(bi) == int and bi <= self.maxD:
            mi, ma = self.bounds[bi]
            if mi != np.inf and ma != -np.inf:
                life = int((self.bounds[bi][1] - self.bounds[bi][0]) / self.res + 1)
                meanBox = np.zeros((life, life), dtype='float32')

                if self.mode == 'freq':  # type(v)==dict: #mode freq
                    for k, v in self.img[bi].items():
                        meanBox[int((ma - k[1]) / self.res), int((k[0] - mi) / self.res)] = np.round(
                            np.sum([t * p for p, t in v.items()]) / np.sum([t for p, t in v.items()]), roundDig)
                else:
                    for k, v in self.img[bi].items():
                        meanBox[int((ma - k[1]) / self.res), int((k[0] - mi) / self.res)] = np.round(np.mean(list(v)),
                                                                                                     roundDig)
                return meanBox
        else:
            # meanBox=np.zeros((self.maxD+1,)) # throw all to same scale? or list of meanBoxes
            # can we do more efficient in the plot? probably right
            meanBoxes = []
            for bi in range(self.maxD + 1):
                mi, ma = self.bounds[bi]
                if mi != np.inf and ma != -np.inf:
                    life = int((self.bounds[bi][1] - self.bounds[bi][0]) / self.res + 1)
                    meanBox = np.zeros((life, life), dtype='float32')

                    if self.mode == 'freq':  # type(v)==dict: #mode freq
                        for k, v in self.img[bi].items():
                            meanBox[int((ma - k[1]) / self.res), int((k[0] - mi) / self.res)] = np.round(
                                np.sum([t * p for p, t in v.items()]) / np.sum([t for p, t in v.items()]), roundDig)
                    else:
                        for k, v in self.img[bi].items():
                            meanBox[int((ma - k[1]) / self.res), int((k[0] - mi) / self.res)] = np.round(
                                np.mean(list(v)), roundDig)
                    meanBoxes.append(meanBox)
                else:
                    print(f"no points in {bi}")
                    meanBoxes.append([])

            return meanBoxes

    def sum_to_numpy(self, bi=None):
        roundDig = 8
        if type(bi) == int and bi <= self.maxD:
            mi, ma = self.bounds[bi]
            if mi != np.inf and ma != -np.inf:
                life = int((self.bounds[bi][1] - self.bounds[bi][0]) / self.res + 1)
                meanBox = np.zeros((life, life), dtype='float32')

                if self.mode == 'freq':  # type(v)==dict: #mode freq
                    for k, v in self.img[bi].items():
                        meanBox[int((ma - k[1]) / self.res), int((k[0] - mi) / self.res)] = np.round(
                            np.sum([t * p for p, t in v.items()]), roundDig)
                else:
                    for k, v in self.img[bi].items():
                        meanBox[int((ma - k[1]) / self.res), int((k[0] - mi) / self.res)] = np.round(np.sum(list(v)),
                                                                                                     roundDig)
                return meanBox
        else:
            # meanBox=np.zeros((self.maxD+1,)) # throw all to same scale? or list of meanBoxes
            # can we do more efficient in the plot? probably right
            meanBoxes = []
            for bi in range(self.maxD + 1):
                mi, ma = self.bounds[bi]
                if mi != np.inf and ma != -np.inf:
                    life = int((self.bounds[bi][1] - self.bounds[bi][0]) / self.res + 1)
                    meanBox = np.zeros((life, life), dtype='float32')

                    if self.mode == 'freq':  # type(v)==dict: #mode freq
                        for k, v in self.img[bi].items():
                            meanBox[int((ma - k[1]) / self.res), int((k[0] - mi) / self.res)] = np.round(
                                np.sum([t * p for p, t in v.items()]), roundDig)
                    else:
                        for k, v in self.img[bi].items():
                            meanBox[int((ma - k[1]) / self.res), int((k[0] - mi) / self.res)] = np.round(
                                np.sum(list(v)), roundDig)
                    meanBoxes.append(meanBox)
                else:
                    print(f"no points in {bi}")
                    meanBoxes.append([])

            return meanBoxes

    def boxStatsIndex(self):
        return {b: {pt: (np.mean(np.array(list(self.img[b][pt]), dtype=np.float32)),
                         np.var(np.array(list(self.img[b][pt]), dtype=np.float32))) for pt in self.img[b].keys()} for b
                in self.img.keys()}


def boxStatsIndex(pdStack):
    return {b: {pt: (
    np.mean(np.array(list(pdStack[b][pt]), dtype=np.float32)), np.var(np.array(list(pdStack[b][pt]), dtype=np.float32)))
                for pt in pdStack[b].keys()} for b in pdStack.img.keys()}


def mean_to_numpy(pdStack, bi=None):
    roundDig = 8
    if type(bi) == int and bi <= pdStack.maxD:
        mi, ma = pdStack.bounds[bi]
        life = int((pdStack.bounds[bi][1] - pdStack.bounds[bi][0]) / pdStack.res + 1)
        meanBox = np.zeros((life, life), dtype='float32')

        for k, v in pdStack.img[bi].items():
            if type(v) == dict:
                meanBox[int((ma - k[1]) / pdStack.res), int((k[0] - mi) / pdStack.res)] = np.round(
                    np.sum([t * p for p, t in v.items()]) / np.sum([t for p, t in v.items()]), roundDig)
            else:
                meanBox[int((ma - k[1]) / pdStack.res), int((k[0] - mi) / pdStack.res)] = np.round(np.mean(list(v)),
                                                                                                   roundDig)
        return meanBox

def boxProjectSet(pdStack,df,regVar,indexMap=None):### set project vs freq project
    pdProj=PDhash(res=pdStack.res,diags=None,maxHdim=pdStack.maxD,persistThresh=pdStack.thresh)
    pdProj.bounds=pdStack.bounds
    if indexMap:
        pdProj.img={b:{pt:{df.loc[indexMap[i]][regVar] for i in pdStack.img[b][pt]} for pt in pdStack.img[b].keys()} for b in pdStack.img.keys()}
    else: #assume the index of the pdStack is referring to the numerical index of the df
        #this will return an error if indices in pdStack are higher than in df but this is expected
        # behavior to prevent unexpected behavior
        pdProj.img={b:{pt:{df.iloc[i][regVar] for i in pdStack.img[b][pt]} for pt in pdStack.img[b].keys()} for b in pdStack.img.keys()}
    return pdProj



In [141]:
def boxProjectSet(pdStack,df,regVar,indexMap=None):### set project vs freq project
    pdProj=PDhash(res=pdStack.res,diags=None,maxHdim=pdStack.maxD,persistThresh=pdStack.thresh)
    pdProj.bounds=pdStack.bounds
    if indexMap:
        pdProj.img={b:{pt:{df.loc[indexMap[i]][regVar] for i in pdStack.img[b][pt]} for pt in pdStack.img[b].keys()} for b in pdStack.img.keys()}
    else: #assume the index of the pdStack is referring to the numerical index of the df
        #this will return an error if indices in pdStack are higher than in df but this is expected
        # behavior to prevent unexpected behavior
        pdProj.img={b:{pt:{df.iloc[i][regVar] for i in pdStack.img[b][pt]} for pt in pdStack.img[b].keys()} for b in pdStack.img.keys()}
    return pdProj

In [7]:
##xyz PBC


with open(f"{inDir}/diverseTopPDhash/pdStack_rppCords0_freq_QuartRes.pkl","rb") as f:
    pdXYZ=pickle.load(f)

In [6]:

pdXYZCap=PDhash(res=pdXYZ.res,diags=None, maxHdim=pdXYZ.maxD,persistThresh=pdXYZ.thresh)
pdXYZCap.bounds=pdXYZ.bounds
pdXYZCap.img={b:{pt:{df.loc[mofNames[i]][0] for i in pdXYZ.img[b][pt]} for pt in pdXYZ.img[b].keys()} for b in pdXYZ.img.keys()}

pdXYZSel=PDhash(res=pdXYZ.res,diags=None, maxHdim=pdXYZ.maxD,persistThresh=pdXYZ.thresh)
pdXYZSel.bounds=pdXYZ.bounds
pdXYZSel.img={b:{pt:{df.loc[mofNames[i]][1] for i in pdXYZ.img[b][pt]} for pt in pdXYZ.img[b].keys()} for b in pdXYZ.img.keys()}


In [124]:
pdXYZSel2=boxProjectSet(pdXYZ,df,"selectivity",mofNames)

In [126]:
pdXYZSel2[1]

{(3.0, 3.25): {1.49925,
  2.60314,
  3.877015,
  4.0822,
  5.073905,
  6.889025,
  6.66752,
  8.00073,
  8.019015,
  10.7364,
  7.36523,
  11.7833,
  13.236525,
  14.487945,
  15.392115,
  16.97487,
  16.66224,
  18.790055,
  18.277675,
  20.454345,
  20.950835,
  22.086455,
  22.84345,
  22.633055,
  24.93138,
  26.754025,
  27.810585,
  28.781495,
  20.70629,
  30.61682,
  30.1731,
  30.55086,
  33.20161,
  34.04611,
  33.006885,
  36.736545,
  37.723055,
  38.754,
  37.19883,
  34.084565,
  33.298935,
  36.32368,
  43.202095,
  44.9385,
  45.18093,
  46.508,
  47.154615,
  48.134325,
  46.88854,
  50.35134,
  51.00549,
  51.85209,
  48.389475,
  54.07332,
  55.596435,
  47.102325,
  57.27165,
  57.751655,
  57.91519,
  60.365415,
  61.796945,
  57.51983,
  59.803425,
  64.10769,
  65.630725,
  65.154825,
  58.996775,
  68.69197,
  69.60441,
  70.721325,
  70.320915,
  71.655935,
  64.88981,
  74.08221,
  66.323175,
  76.73696,
  73.681985,
  78.23896,
  79.619885,
  79.245965,
  80.

In [125]:
pdXYZSel[1]

{(3.0, 3.25): {1.49925,
  2.60314,
  3.877015,
  4.0822,
  5.073905,
  6.889025,
  6.66752,
  8.00073,
  8.019015,
  10.7364,
  7.36523,
  11.7833,
  13.236525,
  14.487945,
  15.392115,
  16.97487,
  16.66224,
  18.790055,
  18.277675,
  20.454345,
  20.950835,
  22.086455,
  22.84345,
  22.633055,
  24.93138,
  26.754025,
  27.810585,
  28.781495,
  20.70629,
  30.61682,
  30.1731,
  30.55086,
  33.20161,
  34.04611,
  33.006885,
  36.736545,
  37.723055,
  38.754,
  37.19883,
  34.084565,
  33.298935,
  36.32368,
  43.202095,
  44.9385,
  45.18093,
  46.508,
  47.154615,
  48.134325,
  46.88854,
  50.35134,
  51.00549,
  51.85209,
  48.389475,
  54.07332,
  55.596435,
  47.102325,
  57.27165,
  57.751655,
  57.91519,
  60.365415,
  61.796945,
  57.51983,
  59.803425,
  64.10769,
  65.630725,
  65.154825,
  58.996775,
  68.69197,
  69.60441,
  70.721325,
  70.320915,
  71.655935,
  64.88981,
  74.08221,
  66.323175,
  76.73696,
  73.681985,
  78.23896,
  79.619885,
  79.245965,
  80.

In [40]:

# Generate sample 2D numerical arrays
meanBoxes=[mean_to_numpy(pdXYZCap,bi=b) for b in range(pdXYZCap.maxD+1)] # just do it all at once from within class function -> later tho
xlims=[np.linspace(pdXYZCap.bounds[b][0],pdXYZCap.bounds[b][1],len(meanBoxes[b])) for b in range(pdXYZCap.maxD+1)]
ylims=xlims
traces=[go.Heatmap(x=xlims[b], y=ylims[b], z=meanBoxes[b][::-1], colorscale='Viridis') for b in range(pdXYZCap.maxD+1)]

layout = go.Layout(title="Multiple Array Heatmaps",
                   xaxis=dict(title="X Axis"),
                   yaxis=dict(title="Y Axis"),
                    autosize=False,
                   width=1000,
                   height=1080,
                   xaxis2=dict(title="X Axis 2", domain=[0.33, 0.66]),
                   yaxis2=dict(title="Y Axis 2"),
                   xaxis3=dict(title="X Axis 3", domain=[0.67, 1]),
                   yaxis3=dict(title="Y Axis 3"),
                   margin=dict(l=50, r=100, t=100, b=100),)

fig = sp.make_subplots(rows=3, cols=1,vertical_spacing=1/3)
for b in range(len(traces)):
    fig.add_trace(traces[b],row=b+1,col=1)

fig.update_layout(layout)
fig.show()

In [34]:
pdXYZCap[0]

{(0.0, 1.0): {0.597645,
  1.907387,
  2.74803,
  2.274991,
  2.348146,
  3.200277,
  1.155811,
  0.467765,
  1.489694,
  2.792873,
  3.495818,
  2.986497,
  3.599525,
  4.76003,
  7.740499,
  2.521749,
  0.260843,
  0.528018,
  3.946883,
  0.371761,
  1.7686,
  0.187361,
  0.215795,
  0.201746,
  0.127516,
  0.09178,
  0.934247,
  0.487329,
  0.515472,
  2.162318,
  2.934337,
  2.059157,
  0.287304,
  0.227904,
  0.116574,
  0.768461,
  0.374729,
  2.649772,
  2.371719,
  2.571737,
  0.454404,
  0.79659,
  0.280965,
  0.499715,
  0.755929,
  0.458157,
  0.2607,
  0.241771,
  0.167935,
  0.303877,
  0.220084,
  0.146642,
  0.256411,
  0.151916,
  0.391368,
  0.787172,
  2.899654,
  0.552783,
  0.849658,
  0.643322,
  2.509091,
  3.349734,
  0.174551,
  0.15522,
  0.202686,
  1.899508,
  2.146311,
  3.04333,
  1.355726,
  0.658801,
  0.966583,
  0.643169,
  2.174516,
  2.333873,
  0.596287,
  0.380644,
  1.03068,
  0.193137,
  0.571382,
  0.909903,
  0.194654,
  0.966932,
  0.649362,
  0

In [47]:
b=0
X=mean_to_numpy(pdXYZCap,bi=b)
x=np.linspace(pdXYZCap.bounds[b][0],pdXYZCap.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} XYZ_Persistence Map - Colored by workingCap")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}XYZ_workCap.html")
fig.write_image(f"{saveDir}/{b}XYZ_workCap.png")


fig.show()

In [48]:
b=1
X=mean_to_numpy(pdXYZCap,bi=b)
x=np.linspace(pdXYZCap.bounds[b][0],pdXYZCap.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} XYZ_Persistence Map - Colored by workingCap")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}XYZ_workCap.html")
fig.write_image(f"{saveDir}/{b}XYZ_workCap.png")



fig.show()

In [49]:
b=2
X=mean_to_numpy(pdXYZCap,bi=b)
x=np.linspace(pdXYZCap.bounds[b][0],pdXYZCap.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} XYZ_Persistence Map - Colored by workingCap")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}XYZ_workCap.html")
fig.write_image(f"{saveDir}/{b}XYZ_workCap.png")


fig.show()

## XYZ Selectivity

In [53]:
regVar="Selectivity"

b=0
X=mean_to_numpy(pdXYZSel,bi=b)
x=np.linspace(pdXYZSel.bounds[b][0],pdXYZSel.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} XYZ_Persistence Map - Colored by {regVar}")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}XYZ_{regVar}.html")
fig.write_image(f"{saveDir}/{b}XYZ_{regVar}.png")


fig.show()

In [54]:
b=1
X=mean_to_numpy(pdXYZSel,bi=b)
x=np.linspace(pdXYZSel.bounds[b][0],pdXYZSel.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} XYZ_Persistence Map - Colored by {regVar}")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}XYZ_{regVar}.html")
fig.write_image(f"{saveDir}/{b}XYZ_{regVar}.png")


fig.show()

In [55]:
b=2
X=mean_to_numpy(pdXYZSel,bi=b)
x=np.linspace(pdXYZSel.bounds[b][0],pdXYZSel.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} XYZ_Persistence Map - Colored by {regVar}")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}XYZ_{regVar}.html")
fig.write_image(f"{saveDir}/{b}XYZ_{regVar}.png")


fig.show()

In [12]:
pdXYZCap.bounds[1]

[1.0, 30.5]

In [60]:
pdXYZCap[2][(3.75,7.5)]

{0.209859, 0.520651, 0.924744, 3.053161}

In [69]:
interest=(7.5,11)
print(pdXYZCap[2][interest])

nearThresh=.25

print(set.union(*(v for k,v in pdXYZCap[2].items() if abs(k[0]-interest[0])<=nearThresh and abs(k[1]-interest[1])<=nearThresh)))

{3.462042}
{0.206619, 1.226469, 0.553833, 3.682601, 1.557332, 0.492569, 0.191393, 1.395171, 0.227166, 0.482199, 3.462042, 0.467698, 2.626929}


In [76]:
interest=(2.5,13.5)
print(pdXYZCap[1][interest])

nearThresh=2

print(set.union(*(v for k,v in pdXYZCap[1].items() if abs(k[0]-interest[0])<=0 and abs(k[1]-interest[1])<=nearThresh)))

{0.383575, 1.094792, 2.720183, 3.744637, 4.456847, 2.733194, 1.515153}
{0.621539, 0.441316, 2.279342, 3.656174, 2.673833, 2.102595, 2.212943, 2.062849, 0.838839, 0.906544, 2.791478, 3.368752, 2.107609, 1.094792, 5.005519, 2.482253, 3.165623, 0.899132, 3.215235, 2.811285, 4.456847, 1.515153, 1.489495, 1.756556, 0.806683, 0.990376, 2.184071, 2.502609, 2.540854, 1.263224, 0.77365, 1.268161, 1.144279, 1.385859, 0.62362, 2.733194, 2.254561, 2.609018, 3.744637, 3.376501, 3.424162, 1.471481, 1.593049, 1.609628, 1.753883, 0.11963, 0.383575, 0.543628, 2.720183, 2.101981, 2.898699, 2.843555, 2.825789, 0.843771, 1.921991, 1.193189}


### Using DFT Dat

In [85]:
with open(f"{inDir}/diverseTopPDhash/pdStack_freq_tFunc.pkl","rb") as f:
    pdStack=pickle.load(f)

In [88]:
with open(f"{inDir}/diverseTopPDhash/pdStackCap_freq_tFunc.pkl","rb") as f:
    pdStackCap=pickle.load(f)

with open(f"{inDir}/diverseTopPDhash/pdStackSel_freq_tFunc.pkl","rb") as f:
    pdStackSel=pickle.load(f)

In [86]:
pdStackCap=PDhash(res=pdStack.res,diags=None, maxHdim=pdStack.maxD,persistThresh=pdStack.thresh)
pdStackCap.bounds=pdStack.bounds
pdStackCap.img={b:{pt:{df.loc[mofNames[i]][0] for i in pdStack.img[b][pt]} for pt in pdStack.img[b].keys()} for b in pdStack.img.keys()}

pdStackSel=PDhash(res=pdStack.res,diags=None, maxHdim=pdStack.maxD,persistThresh=pdStack.thresh)
pdStackSel.bounds=pdStack.bounds
pdStackSel.img={b:{pt:{df.loc[mofNames[i]][1] for i in pdStack.img[b][pt]} for pt in pdStack.img[b].keys()} for b in pdStack.img.keys()}


In [110]:
b=0
X=mean_to_numpy(pdStackCap,bi=b)
x=np.linspace(pdStackCap.bounds[b][0],pdStackCap.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} DFT_Persistence Map - Colored by workingCap")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}DFT_workCap.html",auto_open=False)
fig.write_image(f"{saveDir}/{b}DFT_workCap.png")


fig.show()

In [118]:
b=1
X=mean_to_numpy(pdStackCap,bi=b)
x=np.linspace(pdStackCap.bounds[b][0],pdStackCap.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} DFT_Persistence Map - Colored by workingCap")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}DFT_workCap.html",auto_open=False)
fig.write_image(f"{saveDir}/{b}DFT_workCap.png")


fig.show()

In [117]:
b=2
X=mean_to_numpy(pdStackCap,bi=b)
x=np.linspace(pdStackCap.bounds[b][0],pdStackCap.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} DFT_Persistence Map - Colored by workingCap")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}DFT_workCap.html",auto_open=False)
fig.write_image(f"{saveDir}/{b}DFT_workCap.png")


fig.show()

In [114]:
b=0
X=mean_to_numpy(pdStackSel,bi=b)
x=np.linspace(pdStackSel.bounds[b][0],pdStackSel.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} DFT_Persistence Map - Colored by Selectivity")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}DFT_Selectivity.html",auto_open=False)
fig.write_image(f"{saveDir}/{b}DFT_Selectivity.png")


fig.show()

In [115]:
b=1
X=mean_to_numpy(pdStackSel,bi=b)
x=np.linspace(pdStackSel.bounds[b][0],pdStackSel.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} DFT_Persistence Map - Colored by Selectivity")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}DFT_Selectivity.html",auto_open=False)
fig.write_image(f"{saveDir}/{b}DFT_Selectivity.png")


fig.show()

In [116]:
b=2
X=mean_to_numpy(pdStackSel,bi=b)
x=np.linspace(pdStackSel.bounds[b][0],pdStackSel.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} DFT_Persistence Map - Colored by Selectivity")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/MOF"

pyo.plot(fig, filename=f"{saveDir}/{b}DFT_Selectivity.html",auto_open=False)
fig.write_image(f"{saveDir}/{b}DFT_Selectivity.png")


fig.show()

### CO2 comp

In [84]:
with open(f"{inDir}/diverseTopPDhash/pdStack_rppCords0_freq_QuartRes_CO2Ott.pkl","rb") as f:
    pdStackCO2=pickle.load(f)

---

# HMOF

In [11]:
##xyz PBC


with open(f"{inDir}/hMOF_PDhash/train_pdStack_b4_5.pkl","rb") as f:
    pdXYZ=pickle.load(f)

with open(f"{inDir}/hMOF_PDhash/train_pdStack_index_b4_5.pkl","rb") as f:
    hIndex=pickle.load(f)


In [8]:
import json
with open(f"{inDir}/hMOF-1039C2-CO2/{hIndex[0][:hIndex[0].find('.cif')]}.json","rb") as f:
    moftest=json.load(f)

NameError: name 'hIndex' is not defined

In [13]:
hdf=pd.read_csv(f"Z:/data/hMOF/id_prop.csv",index_col=0,header=None)
hdf.head()

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
hMOF-2000665.cif,4.44756
hMOF-5063946.cif,2.13172
hMOF-6001157.cif,0.0
hMOF-5000253.cif,4.65208
hMOF-5062700.cif,2.30896


In [138]:
print(moftest.keys())
moftest['heats']

dict_keys(['id', 'cif', 'lcd', 'pld', 'url', 'name', 'pxrd', 'heats', 'mofid', 'mofkey', 'hashkey', 'database', 'elements', 'isotherms', 'adsorbates', 'batch_number', 'mofdb_version', 'void_fraction', 'surface_area_m2g', 'surface_area_m2cm3', 'pore_size_distribution'])


[{'id': 6526603,
  'DOI': '10.1039/C2EE23201D',
  'date': '2021-54-02',
  'simin': 'SimulationType                MonteCarlo\nNumberOfCycles                1000\nNumberOfInitializationCycles  1000\nPrintEvery                    250\nRestartFile                   no\n\nChargeMethod                  Ewald\nCutOff                        12.0\nForcefield                    CrystalGenerator\nEwaldPrecision                1e-6\n\nUseChargesFromMOLFile yes\n\nFramework 0\nFrameworkName IRMOF-1_test\nInputFileType mol\nUnitCells 2 2 2\nHeliumVoidFraction 0.792\nExternalTemperature 298\nExternalPressure 0.01E5 0.05E5 0.1E5 0.5E5 2.5E5\n\nComponent 0 MoleculeName             CO2\n            StartingBead             0\n            MoleculeDefinition       TraPPE\n            IdealGasRosenbluthWeight 1.0\n            TranslationProbability   1.0\n            RotationProbability      1.0\n            ReinsertionProbability   1.0\n            SwapProbability          1.0\n            CreateNumberOf

In [139]:
moftest['isotherms']

[{'id': 5179587,
  'DOI': '10.1039/C2EE23201D',
  'date': '2020-30-17',
  'simin': 'SimulationType                MonteCarlo\nNumberOfCycles                1000\nNumberOfInitializationCycles  1000\nPrintEvery                    250\nRestartFile                   no\n\nChargeMethod                  Ewald\nCutOff                        12.0\nForcefield                    CrystalGenerator\nEwaldPrecision                1e-6\n\nUseChargesFromMOLFile yes\n\nFramework 0\nFrameworkName IRMOF-1_test\nInputFileType mol\nUnitCells 2 2 2\nHeliumVoidFraction 0.792\nExternalTemperature 298\nExternalPressure 0.01E5 0.05E5 0.1E5 0.5E5 2.5E5\n\nComponent 0 MoleculeName             CO2\n            StartingBead             0\n            MoleculeDefinition       TraPPE\n            IdealGasRosenbluthWeight 1.0\n            TranslationProbability   1.0\n            RotationProbability      1.0\n            ReinsertionProbability   1.0\n            SwapProbability          1.0\n            CreateNumberOf

In [129]:
hIndex[0]

'hMOF-5028691.cif'

In [146]:

pdXYZCap=boxProjectSet(pdXYZ,hdf,1,hIndex) #pandas defaults at 1 here bc index col is 0


In [148]:
b=0
X=mean_to_numpy(pdXYZCap,bi=b)
x=np.linspace(pdXYZCap.bounds[b][0],pdXYZCap.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} DFT_Persistence Map - Colored by workingCap")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/hMOF"

pyo.plot(fig, filename=f"{saveDir}/{b}DFT_workCap.html",auto_open=False)
fig.write_image(f"{saveDir}/{b}DFT_workCap.png")


fig.show()

In [154]:
interest=(0,6.5)
print(pdXYZCap[0][interest])

nearThresh=0

print(set.union(*(v for k,v in pdXYZCap[0].items() if abs(k[0]-interest[0])<=nearThresh and abs(k[1]-interest[1])<=nearThresh)))

{7.84519, 8.51524, 7.61585, 8.01522, 9.00327, 9.86192, 11.2411, 11.1761, 9.89853, 9.06833, 8.34442, 9.23031, 10.2891, 13.4147, 13.4051, 7.51647, 7.36064, 8.06168, 8.70969, 9.01691, 9.64428, 9.18205, 9.87985, 9.36227, 10.9689, 10.7847, 10.6048, 11.3746, 11.1867, 7.73097, 7.49251, 7.26246, 7.84424, 7.30755, 7.58139}
{7.84519, 8.51524, 7.61585, 8.01522, 9.00327, 9.86192, 11.2411, 11.1761, 9.89853, 9.06833, 8.34442, 9.23031, 10.2891, 13.4147, 13.4051, 7.51647, 7.36064, 8.06168, 8.70969, 9.01691, 9.64428, 9.18205, 9.87985, 9.36227, 10.9689, 10.7847, 10.6048, 11.3746, 11.1867, 7.73097, 7.49251, 7.26246, 7.84424, 7.30755, 7.58139}


In [159]:
b=1

X=mean_to_numpy(pdXYZCap,bi=b)

np.save(f"{inDir}/b1Map.npy",X)

x=np.linspace(pdXYZCap.bounds[b][0],pdXYZCap.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} DFT_Persistence Map - Colored by workingCap")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/hMOF"

pyo.plot(fig, filename=f"{saveDir}/{b}DFT_workCap.html",auto_open=False)
fig.write_image(f"{saveDir}/{b}DFT_workCap.png")


fig.show()

In [150]:

b=2
X=mean_to_numpy(pdXYZCap,bi=b)
x=np.linspace(pdXYZCap.bounds[b][0],pdXYZCap.bounds[b][1],len(X))
y=x
trace=go.Heatmap(x=x, y=y, z=X[::-1], colorscale='Viridis')
layout = go.Layout(title=f"B{b} DFT_Persistence Map - Colored by workingCap")
fig = go.Figure(data=[trace], layout=layout)

saveDir="C:/code/git/topML/frames/hMOF"

pyo.plot(fig, filename=f"{saveDir}/{b}DFT_workCap.html",auto_open=False)
fig.write_image(f"{saveDir}/{b}DFT_workCap.png")


fig.show()

In [155]:
interest=(7,10.5)
print(pdXYZCap[2][interest])

nearThresh=.5

print(set.union(*(v for k,v in pdXYZCap[2].items() if abs(k[0]-interest[0])<=nearThresh and abs(k[1]-interest[1])<=nearThresh)))

{7.29813, 7.50374, 7.91755, 7.73488, 11.7327, 10.8564, 11.2452, 10.3465, 13.6736, 14.6771, 12.7714, 12.1778, 12.475, 12.349, 12.0547, 13.4941, 14.1451, 7.75732, 8.40789, 10.6476, 10.3362, 10.9687, 10.9668, 11.5425, 11.4933, 11.5663, 11.7584, 11.6077, 12.4302, 11.3425, 12.6655, 11.5453, 11.3988, 12.7279}
{10.5736, 7.27961, 8.22355, 8.80108, 10.6476, 11.4001, 10.7561, 11.5138, 13.95, 14.3967, 10.4947, 9.88965, 9.00739, 10.4549, 13.8038, 12.1434, 13.6736, 14.6771, 11.0985, 7.75732, 7.98377, 9.4897, 8.69943, 8.48442, 8.80771, 8.03228, 8.73406, 9.02216, 9.02384, 9.83851, 8.38082, 11.3995, 10.9668, 10.7207, 10.8564, 8.62351, 9.84064, 11.6567, 11.5154, 11.7879, 11.7868, 11.5347, 12.7714, 12.4302, 12.5028, 12.0225, 11.0135, 13.2879, 13.5501, 13.4895, 13.2804, 12.9007, 14.1451, 7.88302, 8.19716, 7.45001, 7.54723, 11.9202, 7.53344, 7.39539, 7.95498, 9.9224, 11.6601, 7.91755, 7.46373, 10.2795, 7.60158, 8.98907, 8.81816, 8.3619, 8.51195, 8.20648, 9.93653, 9.9501, 9.62289, 8.82698, 9.77822, 10.22, 

In [156]:
interest=(10.5,12)
print(pdXYZCap[2][interest])

nearThresh=.25

print(set.union(*(v for k,v in pdXYZCap[2].items() if abs(k[0]-interest[0])<=nearThresh and abs(k[1]-interest[1])<=nearThresh)))

{8.75724, 9.62467, 10.827, 8.6467, 8.23288, 11.1587, 10.8264}
{7.98377, 7.91526, 9.83851, 10.827, 11.979, 8.69734, 9.15345, 8.43347, 10.8264, 8.53039, 16.4928, 9.75473, 11.1587, 7.79464, 7.52806, 7.69929, 8.18118, 8.23759, 43.817, 8.6467, 8.65761, 8.13762, 9.39135, 8.75724, 9.62467, 8.23288, 9.35945}


In [20]:
##xyz PBC
temp="plotly"
colormp="Sunsetdark"
tBins=5

for binNum in range(5):

    with open(f"{inDir}/hMOF_PDhash/train_pdStack_b{binNum}_5.pkl", "rb") as f:
        pdXYZ = pickle.load(f)

    with open(f"{inDir}/hMOF_PDhash/train_pdStack_index_b{binNum}_5.pkl", "rb") as f:
        hIndex = pickle.load(f)


    pdXYZCap = boxProjectSet(pdXYZ, hdf, 1, hIndex)  #pandas defaults at 1 here bc index col is 0

    b = 0
    X = mean_to_numpy(pdXYZCap, bi=b)
    X[X==0] = np.nan
    x = np.linspace(pdXYZCap.bounds[b][0], pdXYZCap.bounds[b][1], len(X))
    y = x
    trace = go.Heatmap(x=x, y=y, z=X[::-1], colorscale=colormp,autocolorscale=False,zmax=20,zmin=0)
    layout = go.Layout(title=f"B{b} XYZ_Persistence Map - Colored by workingCap ({round(binNum/tBins*100)}% - {round((binNum+1)/tBins*100)}%) percentile",
                       xaxis = dict( range=[0,15],),
                       yaxis= dict( range=[0,15],))


    fig = go.Figure(data=[trace], layout=layout)
    fig.update_layout(template=temp)
    saveDir = "C:/code/git/topML/frames/hMOF"

    pyo.plot(fig, filename=f"{saveDir}/{binNum}_{b}XYZ_workCap.html", auto_open=False)
    fig.write_image(f"{saveDir}/{binNum}_{b}XYZ_workCap.png")

    #fig.show()
    #interest = (0, 6.5)
    #print(pdXYZCap[0][interest])

    #nearThresh = 0

    #print(set.union(*(v for k, v in pdXYZCap[0].items() if
    #                  abs(k[0] - interest[0]) <= nearThresh and abs(k[1] - interest[1]) <= nearThresh)))

    b = 1
    X = mean_to_numpy(pdXYZCap, bi=b)
    X[X==0] = np.nan
    x = np.linspace(pdXYZCap.bounds[b][0], pdXYZCap.bounds[b][1], len(X))
    y = x
    trace = go.Heatmap(x=x, y=y, z=X[::-1], colorscale=colormp,zmax=20,zmin=0)
    layout = go.Layout(title=f"B{b} XYZ_Persistence Map - Colored by workingCap ({round(binNum/tBins*100)}% - {round((binNum+1)/tBins*100)}%) percentile",
                       xaxis = dict( range=[0,15],),
                       yaxis= dict( range=[0,15],))
    fig = go.Figure(data=[trace], layout=layout)

    #saveDir = "C:/code/git/topML/frames/hMOF"
    fig.update_layout(template=temp)
    pyo.plot(fig, filename=f"{saveDir}/{binNum}_{b}XYZ_workCap.html", auto_open=False)
    fig.write_image(f"{saveDir}/{binNum}_{b}XYZ_workCap.png")

    #fig.show()
    b = 2
    X = mean_to_numpy(pdXYZCap, bi=b)
    X[X==0] = np.nan
    x = np.linspace(pdXYZCap.bounds[b][0], pdXYZCap.bounds[b][1], len(X))
    y = x
    trace = go.Heatmap(x=x, y=y, z=X[::-1], colorscale=colormp,zmax=20,zmin=0)
    layout = go.Layout(title=f"B{b} XYZ_Persistence Map - Colored by workingCap ({round(binNum/tBins*100)}% - {round((binNum+1)/tBins*100)}%) percentile",
                       xaxis = dict( range=[0,15],),
                       yaxis= dict( range=[0,15],))
    fig = go.Figure(data=[trace], layout=layout)
    fig.update_layout(template=temp)
    #saveDir = "C:/code/git/topML/frames/hMOF"

    pyo.plot(fig, filename=f"{saveDir}/{binNum}_{b}XYZ_workCap.html", auto_open=False)
    fig.write_image(f"{saveDir}/{binNum}_{b}XYZ_workCap.png")

    fig.show()


In [None]:
def saveGifNum(saveNameBase,num,res,fps,outName,tryRm=True, delFileCheck = lambda f: f.find(".png")!=-1 or f.find(".jpg")!=-1 or f.find(".json")!=-1):
    osout=os.system(f"ffmpeg -r {fps} -f image2 -s {res[0]}x{res[1]} -i {saveNameBase}t%d.png -vcodec libx264 -crf 18 {outName}.mp4")
    if osout==0 and tryRm:
        rootDir=saveNameBase[:-1* saveNameBase[::-1].find("/")]
        for f in os.listdir(rootDir):
            if delFileCheck(f):
                try:
                    os.remove(f"{rootDir}{f}")
                except:
                    print(f"del error! \t {rootDir}{f}\n\n")
                    break

In [None]:

inDirP=r"/mnt/c/code/git/topML/frames/hMOF"
os.chdir(inDirP)

saveNameBase="1XYZ_workCap"

fps=1
res=[1980,1200]
outName=saveNameBase
osout=os.system(f"ffmpeg -r {fps} -f image2 -s {res[0]}x{res[1]} -i %d_{saveNameBase}.png -vcodec libx264 -crf 18 {outName}.mp4")

In [None]:
osout=os.system(f"ffmpeg -r {fps} -f image2 -s {res[0]}x{res[1]} -i {saveNameBase}t%d.png -vcodec libx264 -crf 18 {outName}.mp4")

In [22]:
len(hIndex)*5

96360

In [None]:
## adjust scales and show the minimum
## background color gone


interest = (3.5, 8.75)
print(pdXYZCap[2][interest])

nearThresh = .5

print(set.union(*(v for k, v in pdXYZCap[2].items() if
                  abs(k[0] - interest[0]) <= nearThresh and abs(k[1] - interest[1]) <= nearThresh)))

In [None]:
norm = matplotlib.colors.Normalize(vmin=-1, vmax=1)
cmap = cm.get_cmap('bwr')
plt_colors = cm.ScalarMappable(norm=norm, cmap=cmap)
atom_colors = {
i: plt_colors.to_rgba(atom_weights[i]) for i in range(len(atom_weights))
}