# PCA Analysis 

This notebook provides a demonstration of PCA on condensate images.

Allard Mosk, a.p.mosk@uu.nl, May 2025

In [2]:
#Imports
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.colors import hsv_to_rgb
import scipy
import matplotlib.cm as cm
import matplotlib.mathtext as mathtext
import scipy.signal
from scipy import signal
import mpmath
#from astropy.io import fits
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from skimage.restoration import unwrap_phase

import time

from io import BytesIO

from matplotlib.figure import Figure
import matplotlib.pyplot as plt
from matplotlib.transforms import IdentityTransform
import matplotlib.cm as cm

from PIL import Image
import requests

rng=np.random.default_rng() #random number generator

from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

%matplotlib notebook
# plt.style.use('fivethirtyeight')  # pretty cool looking 
import warnings
warnings.filterwarnings('ignore')
from decimal import Decimal

import astropy.io.fits as pyfits
import numpy as np
import matplotlib.pyplot as plt
import os
from matplotlib.widgets import Slider
from matplotlib import colors
from matplotlib.gridspec import GridSpec
from IPython.display import clear_output
import scipy.odr as odr
import imageio
import gc
from matplotlib.widgets import EllipseSelector, RectangleSelector
from matplotlib.pyplot import cm
from scipy.special import eval_legendre
from scipy.optimize import curve_fit
import scipy.fftpack as fft
from matplotlib.colors import LogNorm
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib.patches import Arc
import csv
from scipy.ndimage import center_of_mass

from OAH_refocus import *
import time
import sys
import argparse
import glob
import csv
import matplotlib
import math
from scipy.optimize import curve_fit
from fitfunctions import gaussmod, tfmod, bimodalmod, tfaxialmod, gaussmod_OAH, tfmod_OAH, bimodalmod_OAH
from scipy.ndimage.interpolation import rotate
from scipy.special import zeta
from scipy.signal import savgol_filter
import os
import cv2
from PIL import Image

# Import own fit functions
import OAH_functions as f1
# import OAHDEV_functions as f2
from OAHDEV_functions import *
# from ..data_analysis import fits_analysis as fa

# Constants:
kB = 1.38064852E-23
m = 3.81923979E-26
hb = 1.0545718E-34
asc = 2.802642E-9
mu0 = 1E-50
e0 = 8.854187E-12
pix_size = 6.5E-6 / 2.63
# Light field properties
lamb0 = 589.1E-9  # Wavelength
k0 = 2 * np.pi / lamb0  # k-vector

save_folder = '/home/bec_lab/Desktop/imgs/SOAH/SpinAnalysisApril2025/PCA/'
matplotlib.rcParams['mathtext.fontset'] = 'stix'
matplotlib.rcParams['font.family'] = 'STIXGeneral'

In [5]:
# cropping parameters for the loaded images.
cropBottom=-9
cropTop=10
cropLeft=100
cropRight=-100

#pChebyshev polynomial orders for the background gradient correction.
gradientremovalorder_X=7
gradientremovalorder_Y=3

number_of_images=50 # use only a subset to speed up and reduce memory usage

In [6]:
# load the images from SurfDrive or local disk
# Get images - flat, atoms, 

file_blank='/home/bec_lab/Desktop/full_complex_field_20250507_66_flat.npy'
fileAtoms='/home/bec_lab/Desktop/full_complex_field_20250507_66_atoms.npy'
file_OldNormalized='/home/bec_lab/Desktop/full_complex_field_20250507_66.npy'


# Load from local disk
oldNormalized=np.load(file_OldNormalized, allow_pickle=True)
full_blank_images=np.load(file_blank, allow_pickle=True)
full_complex_images_Atoms=np.load(fileAtoms)
#end if

# Crop the images to the region of interest
blank_images=full_blank_images[:number_of_images,:,cropTop:cropBottom,cropLeft:cropRight]
complex_images_Atoms=full_complex_images_Atoms[:number_of_images,:,cropTop:cropBottom,cropLeft:cropRight]
(nimages,dummy,xdim,ydim)=blank_images.shape

In [7]:
#A function to nicely plot complex images, coding amplitude to brightness and phase to color
def Complex2HSV(z, rmin, rmax, hue_start=90,dphi=360):
    # get amplidude of z and limit to [rmin, rmax]
    amp = np.abs(z)
    amp = np.where(amp < rmin, rmin, amp)
    amp = np.where(amp > rmax, rmax, amp)
    ph = np.angle(z, deg=1)
    # HSV are values in range [0,1]
    h = (ph / dphi + hue_start/360)%1
    s = 0.85 * np.ones_like(h)
    v = (amp -rmin) / (rmax - rmin)
    return hsv_to_rgb(np.dstack((h,s,v)))

In [8]:
def RemovePhaseGradient(image):
    # Remove phase gradients from an image by Chebyshev fit to the unwrapped phase.
    (xdim_,ydim_)=image.shape
    image_unwrapped = unwrap_phase(np.angle(image))


    # Create a grid of points
    X_axis =np.linspace(-1,1,xdim_)
    Y_axis =np.linspace(-1,1,ydim_)
    X_,Y_ = np.meshgrid(X_axis,Y_axis)
    X=X_.flatten()
    Y=Y_.flatten()

    flat1=np.mean(image_unwrapped,axis=0)
    flat2=np.mean(image_unwrapped,axis=1)
    fit1=np.polynomial.chebyshev.chebfit(Y_axis, flat1, gradientremovalorder_Y)
    fit2=np.polynomial.chebyshev.chebfit(X_axis, flat2, gradientremovalorder_X)
    fit2[0]=0
    fiteval=np.polynomial.chebyshev.chebval(Y_axis, fit1)+np.outer(np.ones_like(Y_axis),np.polynomial.chebyshev.chebval(X_axis, fit2)).T

    return np.exp(-1.0j*fiteval)*image



In [37]:
# remove phase gradients from all images and convert to phase only
show_images=False
gradient_removeds_blank=np.zeros_like(blank_images, dtype=float)
gradient_removeds_Atoms=np.zeros_like(complex_images_Atoms,dtype=float)
# Determine how many images you want to plot and the layout of the grid
num_images_to_plot = number_of_images  # Example: Plot the first 4 images
n_cols = 2              # Number of columns in the subplot grid
n_rows = num_images_to_plot   # Calculate number of rows needed


# Loop through the selected images and plot them
for i in range(num_images_to_plot):
  # Create a figure and a grid
  if show_images:
    fig, axes = plt.subplots(1, n_cols, figsize=(16 * n_cols, 3)) # Adjust figsize dynamicallye dynamically
  #end if
  for j in range(n_cols):
    image_data = blank_images[i, j, :,:]
    image_data = RemovePhaseGradient(image_data)
    gradient_removeds_blank[i, j, :,:] = np.angle(image_data)
    cropped_image=image_data[:,1000:2000]

    # Plot the image on the current axes
    if show_images:
        axes[j].imshow(Complex2HSV(z=cropped_image, rmin=0, rmax=1))
        axes[j].set_title(f"Image {0 + i + 1}, {j}") # Add a title to each subplot
    #end if
  #end for
  plt.show()
  #repeat for the images with atoms
  if show_images:
    fig, axes = plt.subplots(1, n_cols, figsize=(16 * n_cols, 3)) # Adjust figsize dynamically
  #end if
  for j in range(n_cols):
    image_data = complex_images_Atoms[i, j, :,:]
    image_data = RemovePhaseGradient(image_data)
    gradient_removeds_Atoms[i, j, :,:] = np.angle(image_data)
    cropped_image=image_data[:,1000:2000]

    # Plot the image on the current axes
    if show_images:
        axes[j].imshow(Complex2HSV(z=cropped_image, rmin=0, rmax=1))
        axes[j].set_title(f"Atoms Image {0 + i + 1}, {j}") # Add a title to each subplot
    #end if
  #end for
  if show_images:
    plt.show()



In [10]:
noatom_data=gradient_removeds_blank.copy()
noatom_reconstruct=gradient_removeds_blank.copy()

# remove the area where the atoms could be
# this could possibly be done a bit tighter.
noatom_data[:,:,20:-20,1000:2000]=0


In [11]:
flatstack=noatom_data.reshape( (number_of_images,2,-1) )
flatstack_reconstruct=noatom_reconstruct.reshape( (number_of_images,2,-1) )
flatstack_Atoms=gradient_removeds_Atoms.reshape( (number_of_images,2,-1) )

In [24]:
stack0=flatstack[:,0,:]
stack0A=flatstack_Atoms[:,0,:]
stack0r=flatstack_reconstruct[:,0,:]
stack1=flatstack[:,1,:]
mysvd0=np.linalg.svd(stack0,full_matrices=False)
U, S, Vh = mysvd0
#mysvd1=np.linalg.svd(stack1)

In [25]:
#This is a modified version of the Vh matrix, which is used to reconstruct the background
# it fills in the missing values in the Vh matrix
VHY = (np.conj(U.T)@stack0r)
VHY = (S**-1)[:,np.newaxis]*VHY

In [26]:
# Lets estimate how much of the variance is explained by the first V singular values
# This is the ratio of the sum of the squares of the first V singular values to the sum of the squares of all singular values
V=12
f_explained=((S[1:V])**2).sum()/((S[1:])**2).sum()
print(f"Fraction of variance explained by the first {V} singular values: {f_explained:.2f}")
print(f"Estimated noise suppression factor: {1/(1-f_explained):.2f}")

Fraction of variance explained by the first 12 singular values: 0.78
Estimated noise suppression factor: 4.57


In [40]:
# plot the contribution of one of the singular vectors in time.
i=0 #which singular vector to plot
fig, ax = plt.subplots(2, 1)
ax[0].plot(np.real(U[:,i]), label='Real Part')
ax[0].plot(np.imag(U[:,i]), label='Imaginary Part')
ax[1].plot(np.real(Vh[i].conjugate()@stack0.T), label='Real Part')
ax[0].legend()
plt.show()


<IPython.core.display.Javascript object>

In [42]:
# Now we clean the images with atoms
# we now try to predict one of our atom images
t=3
V=6 # number of singular values to use
phaserange=1.0
coeff=(Vh.T.conjugate()).T@stack0A[t]
sim=coeff[:V] @ VHY[:V,:]
sim=sim.reshape(xdim,ydim)

fig, ax = plt.subplots(3, 1)
ax[0].imshow(sim, vmin=-phaserange, vmax=phaserange)
ax[1].imshow(gradient_removeds_Atoms[t,0], vmin=-phaserange, vmax=phaserange)
ax[2].imshow(gradient_removeds_Atoms[t,0]-sim, vmin=-phaserange, vmax=phaserange)
plt.show()

fig, ax = plt.subplots(1, 1)
ax.plot((gradient_removeds_Atoms[t,0]-sim)[:,1500])
ax.plot(np.angle(oldNormalized*np.exp(np.pi*1.0j))[t,0,5:,1500]+0.1)
plt.show()


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [32]:
plt.figure()
plt.plot(np.log10(S[1:20]))
plt.show()

<IPython.core.display.Javascript object>