In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import linkage, dendrogram
from sklearn.cluster import AgglomerativeClustering
from sklearn.datasets import make_blobs
from scipy.cluster.hierarchy import centroid, fcluster
from sklearn.neighbors import NearestCentroid

import scipy.cluster.hierarchy as sch
from scipy.cluster.hierarchy import ward, median, centroid, weighted, average, complete, single, fcluster
from scipy.spatial.distance import pdist
from scipy.spatial.distance import euclidean
import scipy.spatial.distance as ssd

from sklearn.metrics import silhouette_samples, silhouette_score

import math

import glob

import warnings
warnings.filterwarnings('ignore')

Author: Negar Nahali, 2024. 

In [4]:
# Here, we find radial and peripheral distribution of the cluster centroids for Oblate0.33 case at energy level E4

def import_data(file_name):
        data = pd.read_csv(file_name, skiprows=9, names=['id', 'x', 'y', 'z', 'ix', 'iy' ,'iz'] , sep=" ", header=None)
        df = pd.DataFrame(data)
        df_filtered_polymer =  df[df['id'] > 630]
        return  df_filtered_polymer[['x', 'y', 'z']]
    
def import_wall(file_name):
        data = pd.read_csv(file_name, skiprows=9, names=['id', 'x', 'y', 'z', 'ix', 'iy' ,'iz'] , sep=" ", header=None)
        df = pd.DataFrame(data)
        df_filtered_wall =  df[df['id'] < 631]
        return  df_filtered_wall[['x', 'y', 'z']]    

def run_agglomerated_clustering(df):
    hac=hc.fit(df)
    membership = hac.labels_
    return membership 

def filter_cluster(df, membership):
    df = df.copy().reset_index(drop=True)
    df["label"] = membership
    cluster_sizes = df.groupby("label").count()["x"].to_dict()
    df["size"] = df["label"].apply(lambda id: cluster_sizes[id])
    return df[ ( (df['size'] > 1)) ]


def myfunction(x):
    return math.sqrt(x[0]**2 + x[1]**2 + x[2]**2)

def min_dis_wall(x):
    return x
    
    
folder_path = '../data/'
j=2000000000
imax=2*10**9
np.set_printoptions(floatmode='unique')
individual_clusters_by_size = []
num_of_clusters_per_file = []
Centroids_dis = []
Centroids_periphery = []
min_peri2 = []
all_centroids = []
iterator = 0
# defining the clustering model
hc = AgglomerativeClustering(n_clusters=None, metric='euclidean', linkage='single', distance_threshold=0.970)
file_list = glob.glob(folder_path + "qua.*")


file_name = folder_path + "qua.dump."+ str(j)
df = import_data(file_name)
membership = run_agglomerated_clustering(df)
df_filtered = filter_cluster(df, membership)
clf = NearestCentroid(metric='euclidean')
clf.fit(df_filtered[["x", "y", "z"]], df_filtered[['label']])
rr = np.apply_along_axis(myfunction, axis=1, arr=clf.centroids_)
Centroids_dis.extend(rr)
all_centroids.extend(clf.centroids_)

############## PERIPHERY
np_wall = import_wall(file_name).to_numpy()
dis_peri = np.ndarray(630,float) 
for ii in range(0,len(clf.centroids_)):
    for jj in range(0,630):
        dis_peri[jj] = np.sqrt((clf.centroids_[ii,0]-np_wall[jj,0])**2 + (clf.centroids_[ii,1]-np_wall[jj,1])**2 + (clf.centroids_[ii,2]-np_wall[jj,2])**2)
    min_p = float(np.round(min(dis_peri).astype(np.float64), 3))
    min_peri2.append(min_p)



all_C = np.array(all_centroids)
centroid_loc = pd.DataFrame(all_C, columns = ['x','y','z'])

Centroids_dis = np.array(Centroids_dis)

min_peri2 = [x -1 for x in min_peri2]

df_rp_Oblate13_E4 = pd.DataFrame({'r':Centroids_dis, 'p':min_peri2})


In [5]:
df_rp_Oblate13_E4

Unnamed: 0,r,p
0,3.117623,2.855
1,2.468956,2.055
2,2.852907,1.489
3,3.968785,2.206
4,5.989772,2.142
...,...,...
60,3.338817,2.098
61,3.522811,2.541
62,1.480919,1.916
63,7.286262,0.693
