# RECOVAR visualization and analysis (copied from cryoDRGN's)

This jupyter notebook provides a template for analyzing results, including:
* plotting of mean, mask and eigenvector
* resolution of mean and decay of eigenvalues
* latent space visualization with PCA/UMAP
* clustering
* trajectories
<!-- * interactive visualization of the latent space, imaging, and pose parameters -->
<!-- * interactive selection of particle images from the latent space -->
<!-- * interactive generation of volumes from the latent space -->

Note that this is a simple template for data analysis, and not a polished UI. Experience with Python/Pandas is recommended.

In [None]:
import recovar.config
from importlib import reload
import numpy as np
from recovar import plot_utils
from recovar import output, dataset
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
import plotly.offline as py
from recovar import simulator, regularization
reload(simulator)
import jax
import warnings
from recovar.fourier_transform_utils import fourier_transform_utils
import jax.numpy as jnp
ftu = fourier_transform_utils(jnp)


2025-02-20 15:30:47.641132: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Load result from pipeline.py

In [None]:
# Load z
recovar_result_dir = '/scratch/gpfs/mg6942/cryodrgn_empiar/empiar10180/inputs/paper_run/'
pipeline_output = output.PipelineOutput(recovar_result_dir)
cryos = pipeline_output.get('lazy_dataset')

In [None]:
reload(plot_utils)
plot_utils.plot_summary_t(pipeline_output,cryos, n_eigs=20)
plt.figure();
plt.semilogy(pipeline_output.get('s')[:20])
plt.title('eigenvalues')
plt.figure()
plt.hist(pipeline_output.get('contrasts')[10], bins = 30)
plt.title('estimated contrast')
print(np.std(pipeline_output.get('contrasts')[10]))

In [None]:
import packaging
from packaging.version import parse as parse_version
parse_version('0.3.1') > parse_version('0.3')

In [None]:
from recovar import deconvolve_density 
reload(deconvolve_density)
alphas = np.flip(np.logspace(-6, 2, 10))
percentile_reject = 10
zdim = '4_noreg'
lbfgsb_sols, alphas, cost, reg_cost, density, total_covar, grids, bounds = deconvolve_density.get_deconvolved_density(pipeline_output, zdim = zdim, pca_dim_max = 4, percentile_reject = percentile_reject, kernel_option = 'sampling', num_points = 50, alphas = alphas, percentile_bound=1)
plt.figure()
plt.loglog(1/alphas, cost)
plt.xlabel('1/(regularization parameter)')
plt.ylabel('residual')
plt.title('Residual vs regularization- \n The elbow in the curve usually gives a good value of the the regularization parameter alpha')

# Visualization of the result of deconvolution under different regularization
deconvolve_density.plot_density(lbfgsb_sols, density, alphas)

In [None]:
## Choose an index, and store the deconvolved density. You can then use it to generate trajectories with 
## python generate_trajectories.py ... --density [recovar_result_dir + 'deconv_density.pkl'] 
## See documentaiton
from recovar import utils
idx = 4
deconv_density =  lbfgsb_sols[idx]
# utils.pickle_dump({ 'density' : lbfgsb_sols[idx], 'latent_space_bounds' : bounds, 'alpha': alphas[idx] }, recovar_result_dir + 'deconv_density.pkl')

In [None]:
# Plot a density sliced in the middle of the distribution
output.plot_over_density(np.array(deconv_density), None, bounds, cmap = 'inferno',  projection_function = 'slice') 


In [None]:
from recovar import latent_density
deconv_density =  lbfgsb_sols[idx]
output_dir = recovar_result_dir + 'output/analysis_4_noreg/'
centers = recovar.utils.pickle_load(output_dir + 'centers.pkl')['centers']
grid_to_z, z_to_grid = latent_density.get_grid_z_mappings(bounds, density.shape[0])
centers_grid = z_to_grid(centers)
output.plot_over_density(np.array(deconv_density), None, bounds, cmap = None, points = centers_grid, projection_function = None, annotate = True) 


In [None]:
# Then you can visualize the trajectory between different kmeans
from recovar import output as o

z_st = centers[3]
z_end = centers[23]
from recovar import trajectory
full_path, subsampled_path = o.make_trajectory_plots_from_results(pipeline_output, 4, './path', cryos = cryos, z_st = z_st, z_end = z_end, gt_volumes= None, n_vols_along_path = 40, plot_llh = False, input_density = deconv_density, latent_space_bounds = bounds)


### Load result from analyze

In [None]:
zdim = '4_noreg'
analysis_dir = f'{recovar_result_dir}output/analysis_{zdim}'
centers = recovar.utils.pickle_load(f'{analysis_dir}/centers.pkl')

In [None]:
# If you changed the output dir, specify it here
# analysis_dir = f'{recovar_result_dir}output/analysis_{z_dim_used}'
# Load UMAP
umap = recovar.utils.pickle_load(f'{analysis_dir}/umap/embedding.pkl')
# or run UMAP
# z = pipeline_output.get('zs')[20]
# umap = analysis.run_umap(z)

g = sns.jointplot(x=umap[:,0], y=umap[:,1], alpha=.1, s=1)
g.set_axis_labels('UMAP1', 'UMAP2')
# plt.savefig('test' + 'centers_'+str(axes[0]) + str(axes[1])+'.png' )

g = sns.jointplot(x=umap[:,0], y=umap[:,1], kind='hex')
g.set_axis_labels('UMAP1', 'UMAP2')

In [None]:
# Load kmeans
K = 40
# kmeans_labels = utils.load_pkl(f'{WORKDIR}/analyze.{EPOCH}/kmeans{K}/labels.pkl')
kmeans_results = recovar.utils.pickle_load(f'{analysis_dir}/centers.pkl')
kmeans_centers = kmeans_results['centers']
kmeans_labels = kmeans_results['labels']

# Or re-run kmeans with the desired number of classes
# kmeans_labels, kmeans_centers = analysis.cluster_kmeans(z, 20)

# # Get index for on-data cluster center
# kmeans_centers, centers_ind = analysis.get_nearest_point(z, kmeans_centers)

### PCA

In [None]:
pc = z
for k1 in range(6):
    for k2 in range(k1+1,6):
        g = sns.jointplot(x=pc[:,k1], y=pc[:,k2], alpha=.1, s=1)
        g.set_axis_labels('PC'+str(k1), 'PC'+str(k2))