In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import re
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from matplotlib import rc
rc('text', usetex=False)

from skimage.transform import resize
from skimage.io import imread
from skimage.metrics import structural_similarity as ssim

from polyphase.parallel import get_distance_matrix

In [4]:
# load the csv file with HTE data
df = pd.read_pickle('../data/ternary_hte.pkl')

In [7]:
def get_batch_of_phasediags(smstr, polymerstr):
    sys_df = df[(df['small molecule']==smstr) & (df['polymer']==polymerstr)]
    filename_list = []
    for _,row in sys_df.iterrows():
        fname = '../figures/hteplots/'+'_'.join(i for i in row.iloc[:3]) + '.png'
        filename_list.append(fname)
    
    return filename_list, sys_df

def get_ssim_distance(file0, file1):
    """ Given two image file names in file0, file1 compute distance"""
    
    img0 = imread(file0)
    img1 = imread(file1)
    img0 = resize(img0, (64,64))
    img1 = resize(img1, (64,64))
    d = ssim(img0, img1, multichannel=True)
    
    return d

In [8]:
smstr = 'DPP(PhTT)2'
polymerstr  = 'MDMO-PPV'
files, sys_df = get_batch_of_phasediags(smstr, polymerstr)
print('Total of {} phase diagrams'.format(len(files)))

M = get_distance_matrix(files, get_ssim_distance) 

2020-08-24 11:48:07,902	INFO resource_spec.py:212 -- Starting Ray with 160.94 GiB memory available for workers and up to 72.97 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).


Total of 86 phase diagrams


2020-08-24 11:48:10,334	INFO services.py:1165 -- View the Ray dashboard at [1m[32mlocalhost:8266[39m[22m


[2m[36m(pid=30943)[0m Computing 63 on 10.111.7.27
[2m[36m(pid=30966)[0m Computing 55 on 10.111.7.27
[2m[36m(pid=30951)[0m Computing 59 on 10.111.7.27
[2m[36m(pid=30963)[0m Computing 73 on 10.111.7.27
[2m[36m(pid=30944)[0m Computing 76 on 10.111.7.27
[2m[36m(pid=30947)[0m Computing 67 on 10.111.7.27
[2m[36m(pid=30954)[0m Computing 64 on 10.111.7.27
[2m[36m(pid=30946)[0m Computing 70 on 10.111.7.27
[2m[36m(pid=30955)[0m Computing 56 on 10.111.7.27
[2m[36m(pid=30941)[0m Computing 62 on 10.111.7.27
[2m[36m(pid=30964)[0m Computing 81 on 10.111.7.27
[2m[36m(pid=30959)[0m Computing 71 on 10.111.7.27
[2m[36m(pid=30968)[0m Computing 65 on 10.111.7.27
[2m[36m(pid=30958)[0m Computing 72 on 10.111.7.27
[2m[36m(pid=30956)[0m Computing 69 on 10.111.7.27
[2m[36m(pid=30937)[0m Computing 60 on 10.111.7.27
[2m[36m(pid=30957)[0m Computing 58 on 10.111.7.27
[2m[36m(pid=30965)[0m Computing 75 on 10.111.7.27
[2m[36m(pid=30942)[0m Computing 53 on 10.11

In [12]:
# from sklearn.cluster import SpectralClustering

# delta = 1/len(files)
# X = np.exp(- M ** 2 / (2. * delta ** 2))

# clustering = SpectralClustering(n_clusters=5,assign_labels="discretize",
#                                 random_state=0, affinity='precomputed').fit(X)


