<a href="https://colab.research.google.com/github/mtsizh/galaxy-morphology-manifold-learning/blob/main/decals_3d_embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

You can either upload your own `curated_imgs.zip` or use the following code to download it from our repository.

In [None]:
!wget -q https://raw.githubusercontent.com/mtsizh/galaxy-morphology-manifold-learning/main/curated_dataset/curated_imgs_multipart.zip && echo "HEAD dowloaded" || "ERROR downloading HEAD"

for i in range(1,8):
  !wget -q https://raw.githubusercontent.com/mtsizh/galaxy-morphology-manifold-learning/main/curated_dataset/curated_imgs_multipart.z0{i}  && echo "PART {i} of 7 OK" || "ERROR downloading PART {i}"

print('MERGING PARTS')
!zip -FF curated_imgs_multipart.zip --out curated_imgs.zip > /dev/null && rm curated_imgs_multipart.z* && echo "COMPLETE" || "FAILED"

!unzip -q -o curated_imgs.zip && echo "UNZIPPED" || "FAIL"

HEAD dowloaded
PART 1 of 7 OK
PART 2 of 7 OK
PART 3 of 7 OK
PART 4 of 7 OK
PART 5 of 7 OK
PART 6 of 7 OK
PART 7 of 7 OK
MERGING PARTS
COMPLETE
UNZIPPED


Few libraries are not installed by default.

In [None]:
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!python rapidsai-csp-utils/colab/pip-install.py

try:
  from cuml.manifold import TSNE
  from cuml.manifold import UMAP
  from cuml.decomposition import PCA
  from google.colab import output
  output.clear()
except:
  print('ERROR')
finally:
  print('COMPLETE')

COMPLETE


In [None]:
import pandas as pd
import numpy as np
from sklearn.manifold import LocallyLinearEmbedding, Isomap, TSNE
from cuml.manifold import UMAP
from cuml.decomposition import PCA
from tqdm.auto import tqdm
from PIL import Image
import pickle


# use different class maps to get different plots
class_map = {1: 'round', 2: 'inbetween', 3: 'cigar'}
#class_map = {4: 'edge on', 5: 'edge off'}
#class_map = {6: 'smooth', 7: 'featured'}
n_bootstrap_samples = 6000


df = pd.read_parquet('curated_dataset.parquet')
regex_filter = '|'.join(class_map.values())
filtered_df = df[df['class'].str.contains(regex_filter, regex=True)]
bootstrapped_df = filtered_df.sample(n=n_bootstrap_samples)
X = np.zeros((len(bootstrapped_df), 120, 120))
y = np.zeros(len(bootstrapped_df))


for key, val in class_map.items():
  y[bootstrapped_df['class'].str.contains(val, regex=True)] = key

print('LOAD IMAGES')
paths = bootstrapped_df['png_loc'].str.replace('dr5', 'curated_imgs')
with tqdm(total=len(paths)) as progress:
  for idx, file_path in enumerate(paths):
    with Image.open(file_path) as img:
      X[idx,:,:] = np.array(img)
      progress.update()
X_flattened = X.reshape(X.shape[0], -1)


reduction_methods = {
    'TSNE': TSNE(n_components=3, perplexity=50),
    'UMAP': UMAP(n_components=3, n_neighbors=30, min_dist=0.1, metric='euclidean'),
    'ISO': Isomap(n_neighbors=30, n_components=3),
    'PCA': PCA(n_components=3),
    'LLE': LocallyLinearEmbedding(n_neighbors=30, n_components=3)
}

for name, method in reduction_methods.items():
  print(f'CALCULATE {name}')
  X_embedded = method.fit_transform(X_flattened)
  cname = '_'.join(class_map.values())
  out_file = f'{cname}_{name}_3D.pkl'.replace(' ', '_')
  print('SAVING to: ', out_file)
  with open(out_file, 'wb') as f:
    pickle.dump({'X_embedded': X_embedded, 'y': y, 'class_map': class_map}, f)
print('COMPLETE')

LOAD IMAGES


  0%|          | 0/6000 [00:00<?, ?it/s]

CALCULATE TSNE
SAVING to:  round_inbetween_cigar_TSNE_3D.pkl
CALCULATE UMAP
SAVING to:  round_inbetween_cigar_UMAP_3D.pkl
CALCULATE ISO
SAVING to:  round_inbetween_cigar_ISO_3D.pkl
CALCULATE PCA
SAVING to:  round_inbetween_cigar_PCA_3D.pkl
CALCULATE LLE
SAVING to:  round_inbetween_cigar_LLE_3D.pkl
COMPLETE


You can download precomputed embeddings




In [None]:
!wget https://raw.githubusercontent.com/mtsizh/galaxy-morphology-manifold-learning/main/embeddings/edge_on_edge_off_LLE_138_neighbours_10.pkl
!wget https://raw.githubusercontent.com/mtsizh/galaxy-morphology-manifold-learning/main/embeddings/round_inbetween_cigar_LLE_138_neighbours_10.pkl
!wget https://raw.githubusercontent.com/mtsizh/galaxy-morphology-manifold-learning/main/embeddings/smooth_featured_LLE_138_neighbours_10.pkl


Draw the final result

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import seaborn as sns
import plotly.express as px

# set correct filename to load embedding data
load_from = '/content/round_inbetween_cigar_ISO_3D.pkl'

with open(load_from, 'rb') as f:
  data = pickle.load(f)
  X_embedded = data['X_embedded']
  y = data['y']
  class_map = data['class_map']


# Convert data to DataFrame
df = pd.DataFrame(X_embedded, columns=['X', 'Y', 'Z'])
df['Class'] = df.index.map(lambda i: class_map[y[i]])  # Map numerical labels to names

# Define custom colors for each class
custom_color_map = {
    "round": "red",
    "inbetween": "blue",
    "cigar": "green"
}

# Create 3D scatter plot with labeled classes
fig = px.scatter_3d(df, x='X', y='Y', z='Z',
                     color='Class',  # Use class names instead of numbers
                     color_discrete_map=custom_color_map,  # Apply custom colors
                     title="3D Point Cloud",
                     labels={'Class': 'Category'},
                     opacity=0.8)  # Adjust opacity if needed

# Change marker size
fig.update_traces(marker=dict(size=2))  # Set smaller point size

fig.show()


(6000, 3)
(6000,)
