In [1]:
import os
import os.path
import shutil
import logging
import matplotlib
matplotlib.use('Agg')  # Set the backend to Agg
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import cryodrgn
from cryodrgn import analysis, utils, config
from cryodrgn.starfile import Starfile
import pandas as pd
from cryodrgn.source import ImageSource
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist

Load integrated pose

In [2]:
import pickle
filepath = '/scratch/gpfs/ZHONGE/mj7341/research/00_moml/antibody/dataset/conformational/integrated_poses_chimera.pkl'
with open(filepath, 'rb') as file:
    poses = pickle.load(file)

In [3]:
rots = poses[0].reshape(len(poses[0]),-1)
trans = poses[1]

In [4]:
outdir = '/scratch/gpfs/ZHONGE/mj7341/MoML/cryosim/umap_pca'

Load starfile

In [5]:
star_path = '/scratch/gpfs/ZHONGE/mj7341/research/00_moml/antibody/dataset/conformational/add_noise/128_chimera_resample/snr01/snr01_star.star'
ori_s = Starfile.load(star_path)
ori_df = ori_s.df

# UMap

In [7]:
umap_emb_rot = analysis.run_umap(rots)
umap_emb_trans = analysis.run_umap(trans) 

In [8]:
np.save(f"{outdir}/embeds/umap/umap_emb_rot.npy", umap_emb_rot)
np.save(f"{outdir}/embeds/umap/umap_emb_trans.npy", umap_emb_trans)

In [7]:
# umap_emb_rot = np.load(f"{outdir}/embeds/umap/umap_emb_rot.npy")
# umap_emb_trans = np.load(f"{outdir}/embeds/umap/umap_emb_trans.npy")

Whole data UMap

In [9]:
# figure
fig, axs = plt.subplots(1,2, figsize=(10,5))
axs[0].scatter(umap_emb_rot[:, 0], umap_emb_rot[:, 1], alpha=0.1, s=1, rasterized=True)
axs[0].set_title('Rots')
axs[1].scatter(umap_emb_trans[:, 0], umap_emb_trans[:, 1], alpha=0.1, s=1, rasterized=True)
axs[1].set_title('Trans')
for ax in axs.flat:
    ax.set(xlabel='UMAP1', ylabel='UMAP2')

plt.tight_layout()
plt.savefig(f"{outdir}/pose_plots/poses_whole_umap.png")

### 3D Classification

Run get_indices_for_csfile.py before loading indices pickle

In [17]:
ind_dir = '/scratch/gpfs/ZHONGE/mj7341/research/00_moml/antibody/dataset/conformational/cryosparc'

In [14]:
# original code: 20231210_fsc_auc
cls_5 = []
num_classes = 5
for i in range(num_classes):
    with open(ind_dir+'/3dcls/3dcls_cs'+str(i)+'.pkl', 'rb') as file:
        cs_idx = pickle.load(file)
    lst = []
    for key, value in cs_idx.items():
        lst.append(np.array(value))
    stacked_array = np.hstack(lst)
    cls_5.append(stacked_array)

In [None]:
# idx_lst = [item for sublist in particle_inds_dict.values() for item in sublist]
# idx_array = np.array(idx_lst)

In [16]:
# figure
fig, axs = plt.subplots(1,2, figsize=(10,5))
labels = np.arange(len(cls_5))
for i in range(5):
    axs[0].scatter(umap_emb_rot[cls_5[i]][:, 0], umap_emb_rot[cls_5[i]][:, 1], alpha=0.5, s=1, rasterized=True, label=labels[i])
    axs[1].scatter(umap_emb_trans[cls_5[i]][:, 0], umap_emb_trans[cls_5[i]][:, 1], alpha=0.5, s=1, rasterized=True, label=labels[i])
    
axs[0].set_title('Rots')
axs[1].set_title('Trans')
for ax in axs.flat:
    ax.set(xlabel='UMAP1', ylabel='UMAP2')

plt.tight_layout()
plt.savefig(f"{ind_dir}/3dcls/poses_w_cls1_to_5_umap.png")

# PCA

In [19]:
from sklearn.decomposition import PCA
def run_pca(z: np.ndarray):
    pca = PCA(z.shape[1])
    pca.fit(z)
    pc = pca.transform(z)
    return pc, pca

In [20]:
rots = poses[0].reshape(len(poses[0]),-1)
pc_rot, pca_rot = run_pca(rots)
pc_trans, pca_trans = run_pca(poses[1])

Run get_indices_for_csfile.py before loading indices pickle

In [21]:
ind_dir = '/scratch/gpfs/ZHONGE/mj7341/research/00_moml/antibody/dataset/conformational/cryosparc'

### 3D Classification (num class=5)

In [22]:
# original code: 20231210_fsc_auc
cls_5 = []
num_classes = 5
for i in range(num_classes):
    with open(ind_dir+'/3dcls/3dcls_cs'+str(i)+'.pkl', 'rb') as file:
        cs_idx = pickle.load(file)
    lst = []
    for key, value in cs_idx.items():
        lst.append(np.array(value))
    stacked_array = np.hstack(lst)
    cls_5.append(stacked_array)

In [23]:
fig, axs = plt.subplots(1,2, figsize=(10,5))
labels = np.arange(len(cls_5))

for i in range(5):
    axs[0].scatter(pc_rot[cls_5[i]][:, 0], pc_rot[cls_5[i]][:, 1], alpha=0.5, s=1, rasterized=True, label=labels[i])
    axs[1].scatter(pc_trans[cls_5[i]][:, 0], pc_trans[cls_5[i]][:, 1], alpha=0.5, s=1, rasterized=True, label=labels[i])
    
axs[0].set_title('Rots')
axs[1].set_title('Trans')
for ax in axs.flat:
    ax.set(xlabel='UMAP1', ylabel='UMAP2')

plt.tight_layout()
plt.savefig(f"{ind_dir}/3dcls/poses_w_cls1_to_5_PCs.png")

### 87 models 

In [24]:
# figure
fig, axs = plt.subplots(1,2, figsize=(10,5))
for i in range(87):
    idx = np.arange(1000*i,1000*(i+1))    
    axs[0].scatter(pc_rot[idx][:, 0], pc_rot[idx][:, 1], alpha=0.5, s=1, rasterized=True, c=hexcodes[i])
    axs[1].scatter(pc_trans[idx][:, 0], pc_trans[idx][:, 1], alpha=0.5, s=1, rasterized=True, c=hexcodes[i])

axs[0].set_title('Rots')
axs[1].set_title('Trans')
for ax in axs.flat:
    ax.set(xlabel='UMAP1', ylabel='UMAP2')

plt.tight_layout()
plt.savefig(f"{ind_dir}/poses_w_87models_PCs.png")