# 1. MSI image segmentation

### 1a. This step enables you to segment your MSI image across all m/z values. Prior to doing this, Gaussian spectral smoothing and baseline subtraction will be performed.

*Lines to update:*

*4.) Filename: Update with the path to your .imzML file*

*5.) Output directory: Update with the path to your output directory*

*6.) Same name: Update with your sample name*

In [None]:
filename = r"example_data\prostate_example.imzML"
output_directory = r"E:\MSIght_output"
sample_name = 'R0008'

#### Option 1 (Quickest, least accurate): Manual setting. Here you can set the t-SNE parameters manually with no score or quality control

*All lines below can optionally be updated based on dataset*

In [None]:
###Advanced parameters###
sigma = 2 
structuring_element_size = 10
pca_components = 100
tsne_components = 2
tsne_verbose = 0
tsne_perplexity = 41
tsne_interations = 1349
tsne_learning_rate = 462
k_means_cluster_number = 8 #8

In [None]:
from MSIght_Jupyter.refactor_segment import cluster_msi
unfiltered_cluster_results,width,height,cluster_colors,cluster_image_full,cmap,legend_handles_full,tsne_result= cluster_msi(filename,output_directory,sample_name,sigma,structuring_element_size,pca_components,tsne_components,tsne_perplexity,tsne_learning_rate,tsne_interations,k_means_cluster_number)

#### Option 2 (most accurate, slowest): Exhaustively determine the t-SNE parameters that result in the highest Silhouette score.

In [None]:
###Advanced parameters###
sigma = 2
structuring_element_size = 10
pca_components = 50
tsne_components = 2
tsne_verbose = 0
tsne_perplexity = 5
tsne_interations = 300
k_means_cluster_number = 8

In [None]:
from MSIght_Jupyter.refactor_segment import cluster_msi_scored_w_csv
unfiltered_cluster_results,width,height,cluster_colors,cluster_image_full,cmap,legend_handles_full,tsne_result = cluster_msi_scored_w_csv(filename, output_directory, sample_name, sigma, structuring_element_size, pca_components,
                tsne_components, tsne_verbose, k_means_cluster_number)

### 1b. Based on results from the above step, select clusters to remove that do not correspond to the tissue area.

#### Confirm that the new image looks ideal. If not, adjust the clusters removed

*Lines to update:*

*3.) List of clusters to remove*

In [None]:
from MSIght_Jupyter.refactor_segment import cluster_removal

clusters_to_remove = [1,5,7,2]
filtered_cluster_results = cluster_removal(unfiltered_cluster_results,width,height,cluster_colors,cluster_image_full,
                                           cmap,legend_handles_full,clusters_to_remove,output_directory,sample_name)

### 1c. Using segmented coordinates, generate composite image of all m/z

#### Adjust threshold value as necessary

*Lines to update:*

*4.) Threshold*

In [None]:
from MSIght_Jupyter.refactor_segment import make_composite_image

threshold = 0.3
full_composite_image = make_composite_image(filtered_cluster_results,threshold,output_directory,sample_name)

### 1d. Segment composite image

In [None]:
from MSIght_Jupyter.refactor_segment import composite_wo_selected_clusters

segmented_image = composite_wo_selected_clusters(unfiltered_cluster_results,clusters_to_remove,full_composite_image,output_directory,sample_name)

*Lines to update:*

*3.) Median filter size*

In [None]:
from MSIght_Jupyter.refactor_segment import remove_residual_noise

median_filter_size = 1
final_MSI_image = remove_residual_noise(segmented_image,median_filter_size,output_directory,sample_name)

# 2. H&E Image Processing
### Adjust threshold value as needed

*Lines to update:*

*4.) Path to H&E image in .TIF format*

*6.) Sample name*

*7.) Path to output directory*

In [None]:
from MSIght_Jupyter.refactor_histology_preprocess import preprocess_he

image_path = 'example_data\prostate_he_example.tif'
threshold_value = 128
sample_name = 'R0008_norm_centroid'
output_directory = r"E:\MSIght_output"

final_he_image = preprocess_he(image_path,threshold_value,sample_name,output_directory)

# 3. Rotate MSI image to match the H&E orientation
### Here is a comparison of both images:

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 10))
plt.subplot(2, 3, 1)
plt.title('MSI image')
plt.imshow(final_MSI_image, cmap='gray')

plt.subplot(2, 3, 2)
plt.title('H&E image')
plt.imshow(final_he_image, cmap='gray')

### 3a. Run this cell enough times to get in the proper orientation (rotates 90 degrees each time):

In [None]:
import numpy as np
final_MSI_image = np.rot90(final_MSI_image)

plt.figure(figsize=(12, 10))
plt.subplot(2, 3, 1)
plt.title('MSI image')
plt.imshow(final_MSI_image, cmap='gray')

plt.subplot(2, 3, 2)
plt.title('H&E image')
plt.imshow(final_he_image, cmap='gray')

### 3b. Run this cell to flip the image horizontally (up/down):

In [None]:
import numpy as np
final_MSI_image = np.flipud(final_MSI_image)

plt.figure(figsize=(12, 10))
plt.subplot(2, 3, 1)
plt.title('MSI image')
plt.imshow(final_MSI_image, cmap='gray')

plt.subplot(2, 3, 2)
plt.title('H&E image')
plt.imshow(final_he_image, cmap='gray')

### 3c. Run this cell to flip the image vertically (left/right):

In [None]:
import numpy as np
final_MSI_image = np.fliplr(final_MSI_image)

plt.figure(figsize=(12, 10))
plt.subplot(2, 3, 1)
plt.title('MSI image')
plt.imshow(final_MSI_image, cmap='gray')

plt.subplot(2, 3, 2)
plt.title('H&E image')
plt.imshow(final_he_image, cmap='gray')

# 4. Interpolate MSI image

### Option 1: Linear interpolation (default)

*Lines to update:*

*4.) Path to H&E image in .TIF format*

*5.) Path to MSI file in .imzML format*

*6.) Sample name*

*7.) Path to output directory*

In [None]:
from MSIght_Jupyter.refactor_interpolation import interpolate_MSI

image_path = 'example_data\prostate_he_example.tif'
filename = 'example_data\prostate_example.imzML'
sample_name = 'R0008_norm_centroid'
output_directory = r"E:\MSIght_output"

sized_he_image,interpolated_MSI_image = interpolate_MSI(filename,image_path,final_MSI_image,final_he_image,output_directory,sample_name)

# 5. Affine transform images

### Option 1: Perform automated affine transformation

*Lines to update:*

*4.) MSI threshold value*

*5.) H&E threshold value*

In [None]:
from MSIght_Jupyter.refactor_affine_transform import register_he_msi

msi_threshold = 18
he_threshold = 0.3

affine_matrix,transformed_ms_image = register_he_msi(sized_he_image,interpolated_MSI_image,msi_threshold,he_threshold,output_directory,sample_name)

### Option 2: Perform manual, landmark-based affine transformation

In [None]:
import plotly.express as px

def show_msi_he_coords(final_MSI_image,final_he_image):
    fig = px.imshow(final_MSI_image,title='MSI Image')
    fig.show()

    #rotated_array = np.rot90(cropped_image)
    rotated_array = final_he_image
    fig = px.imshow(rotated_array,title='H&E Image')
    fig.show()

#### Here are the MSI and H&E images. They can be zoomed in and hovered over to identify corresponding data points

In [None]:
from MSIght_Jupyter.refactor_manual_affine import show_msi_he_coords

show_msi_he_coords(interpolated_MSI_image,sized_he_image)

#### Input your data points below for H&E (pts_he) and MSI (pts_ms)

*Lines to update:*

*2.) H&E points to register*

*3.) Corresponding MSI points to register*

In [None]:
# Define corresponding points
pts_he = np.array([[492,1193],[1501,483],[843,81],[73,1124]], dtype=np.float32)
pts_ms = np.array([[588,1345],[1655,543],[988,56],[39,1082]], dtype=np.float32)

In [None]:
from MSIght_Jupyter.refactor_manual_affine_transform import manual_register_he_msi

affine_matrix,transformed_ms_image = manual_register_he_msi(pts_ms, pts_he,interpolated_MSI_image,sized_he_image,output_directory,sample_name)

# 6. Refine transformation with non-linear transformation (B-spline refinement)

### Optional: Downsize images to determine optimal B-spline refinement parameters

In [None]:
import cv2
import numpy as np
import plotly.express as px

def downsample_image_numpy(image, factor):
    # Calculate the new dimensions
    new_dimensions = (int(image.shape[1] / factor), int(image.shape[0] / factor))
    
    # Downsample the image using OpenCV
    downsampled_np = cv2.resize(image, new_dimensions, interpolation=cv2.INTER_LINEAR)
    
    return downsampled_np

downsampled_he_image = downsample_image_numpy(sized_he_image, factor=30)  # Downsample by a factor of 2
downsampled_ms_image = downsample_image_numpy(transformed_ms_image, factor=30)

fig = px.imshow(downsampled_he_image,title='HE Image')
fig.show()

fig = px.imshow(downsampled_ms_image,title='MS Image')
fig.show()

*Lines to update:*

*4.) Numer of histograms*

*5.) Gradient tolerance*

*6.) Optimizer iterations*

*7.) Coarseness*

In [None]:
from MSIght_Jupyter.refactor_bspline import perform_bspline

number_histograms = 75 #50
gradient_tolerance = 1e-5 #1e-5
optimizer_iterations = 20 #100
courseness = 10 #50

b_spline_apply = perform_bspline(downsampled_he_image,downsampled_ms_image,number_histograms,gradient_tolerance,optimizer_iterations,courseness)

### Apply B-spline

*Lines to update:*

*4.) Numer of histograms*

*5.) Gradient tolerance*

*6.) Optimizer iterations*

*7.) Coarseness*

In [None]:
from MSIght_Jupyter.refactor_bspline import perform_bspline

number_histograms = 75 #50
gradient_tolerance = 1e-5 #1e-5
optimizer_iterations = 20 #100
courseness = 10 #50

b_spline_apply = perform_bspline(sized_he_image,transformed_ms_image,number_histograms,gradient_tolerance,optimizer_iterations,courseness)

# Process LC-MS/MS Data with MS-Fragger

*Note: You will need to install MSFragger (housed within FragPipe) for this:* [https://fragpipe.nesvilab.org/](https://fragpipe.nesvilab.org/)

***
The context for this workflow is for routine analysis. You can provide a .WORKFLOW file from FragPipe that can be run routinely with just a small amount of interjection from MSIght. For optimization of FragPipe parameters and more sophisticated tasks, please refer directly to FragPipe directly. Following this, you can provide a workflow path (below) for routine use.
***

*Be sure in the fields below, all file paths have two backslashes instead of one if using Windows.*

In [None]:
from MSIght_Jupyter.refactor_msfragger import run_fragpipe

fragpipe_path = 'downloads\\FragPipe-22.0\\fragpipe\\bin\\fragpipe.bat'
working_directory = 'example_data\\MSFragger_output'
generic_workflow_file = "example_data\fragpipe.workflow"
fasta_db = 'example_data\\mouse_proteome_decoys_reviewed.fas'

raw_files = ['example_data\\prostate_example_TR1.raw',
             'example_data\\prostate_example_TR2.raw',
            'example_data\\prostate_example_TR3.raw']

run_fragpipe(working_directory,generic_workflow_file,raw_files,fasta_db,fragpipe_path)

# Utility #1: Combine LC-MS/MS, MSI, and H&E data- Protein-wise

### This example is for if a specific protein or set of proteins are the target (based on UniProt ID)

*Lines to update:*

*1.) Path to MSFragger output file directory*

*3.) Proteins of interest*

*4.) MSI ppm error*

In [None]:
fragger_output_path = 'example_data\MSFragger_output'
output_path = r"E:\MSIght_output"
psm_path = output_path + '\\psm.tsv'
protein_oi_list = ['P28481','P08121']
ppm_error = 50

from MSIght_Jupyter.refactor_fragger_process import process_fragger
fragger_results_formatted = process_fragger(protein_oi_list,ppm_error,psm_path,sized_he_image,output_path)

from MSIght_Jupyter.refactor_he_lcmsms_msi_merge import merge_all_results
merge_all_results(output_directory,sample_name,fragger_results_formatted,filename,sized_he_image,b_spline_apply)

# Utility #2: Extract a m/z image and overlay with H&E image

*Lines to update:*

*1.) m/z of interst*

*3.) m/z tolerance for MSI, in Da*

*4.) z-value. Should be set to 1 in most instances for MALDI*

*4.) Path to imaging .imzML path*

In [None]:
mz = 773.583
mz_tolerance = 0.1
z_value = 1
filename = r'example_data\prostate_example.imzML'

from MSIght_Jupyter.mz_image_extract import extract_mz_image_transform
from MSIght_Jupyter.mz_image_extract import overlay_msi_he

mz_image_transformed = extract_mz_image_transform(filename, mz, mz_tolerance, z_value, b_spline_apply, sized_he_image)
overlay_msi_he(mz_image_transformed,sized_he_image,mz)

# Utility #3: Combine LC-MS/MS, MSI, and H&E data- Gene-wise

### This example is for if a specific gene or set of gene are the target (based on gene ID)

*Lines to update:*

*1.) Path to MSFragger output file directory*

*3.) Genes of interest*

*4.) MSI ppm error*

In [None]:
fragger_output_path = 'example_data\MSFragger_output'
output_path = r"E:\MSIght_output"
psm_path = output_path + '\\psm.tsv'
gene_oi_list = ['P28481','P08121']
ppm_error = 50

from MSIght_Jupyter.fragger_process import process_fragger_gene
fragger_results_formatted = process_fragger(gene_oi_list,ppm_error,psm_path,sized_he_image,output_path)

from MSIght_Jupyter.he_lcmsms_msi_merge import merge_all_results_gene_wise
merge_all_results(output_directory,sample_name,fragger_results_formatted,filename,sized_he_image,b_spline_apply)

# Utility #4: Combine LC-MS/MS, MSI, and H&E data untargeted

In [None]:
from MSIght_Jupyter.fragger_process import process_fragger_gene
fragger_results_formatted = global_proteomics_search(fragger_results_path,threshold,min_prot_instances,ppm_error,output_path
                                                     
from MSIght_Jupyter.he_lcmsms_msi_merge import merge_all_results
merge_all_results(output_directory,sample_name,fragger_results_formatted,filename,sized_he_image,b_spline_apply)                                                     