## Script: Convert Visium Data to TissuePlot Format

In [None]:
# Required imports
import pandas as pd
import scipy.io

In [None]:
def create_spot_cluster_membership(barcode_file, weight_file, output_file="SpotClusterMemebership.csv"):
    """
    Merge barcode.tsv and SP4_weights.txt into a single CSV file.
    
    Parameters:
    barcode_file (str): Path to barcode.tsv file
    weight_file (str): Path to weights.txt file
    output_file (str): Name of the final output CSV file
    
    """
    
    # Read barcodes
    barcodes = pd.read_csv(barcode_file, header=None, names=["barcode"], sep="\t")
    
    # Read weights file (tab-separated)
    weights = pd.read_csv(weight_file, sep="\t")
    
    # Merge barcodes with weights (assuming they are in the same order)
    merged = pd.concat([barcodes, weights], axis=1)
    
    # Save to CSV
    merged.to_csv(output_file, index=False)
    print(f"Spot cluster memebership file saved as {output_file}")

In [None]:
def prepare_expressed_genes(matrix_file, features_file, barcode_file, spot_file, output_file="TopExpressedGenes.csv"):
    """
    Create a filtered gene-spot expression matrix.

    Parameters:
        matrix_file (str): Path to the Matrix Market file (.mtx) containing UMI counts.
        features_file (str): Path to the features file (.tsv) with gene information 
                             (gene names are taken from the second column).
        barcode_file (str): Path to the barcodes file (.tsv).
        spot_file (str): Path to the SpotClusterMembership CSV file.
        output_file (str): Name of the output CSV file containing the TopExpressedGenes.

    """

    # Load gene names from the features file (second column contains gene names)
    features_df = pd.read_csv(features_file, sep='\t', header=None)
    gene_names = features_df[1].tolist()

    # Load barcodes from the barcode file
    barcode_df = pd.read_csv(barcode_file, sep='\t', header=None)
    barcodes = barcode_df[0].tolist()

    # Load the sparse matrix
    matrix = scipy.io.mmread(matrix_file).tocoo()

    # Convert matrix to a DataFrame
    df = pd.DataFrame({
        'Gene_Index': matrix.row,
        'Barcode_Index': matrix.col,
        'UMI_Count': matrix.data
    })

    # Load SpotClusterMembership file and extract valid barcodes
    spot_df = pd.read_csv(spot_file)
    valid_barcodes = set(spot_df.iloc[:, 0].tolist())

    # Map indices to barcodes and filter only valid ones
    barcode_mapping = {idx: barcode for idx, barcode in enumerate(barcodes) if barcode in valid_barcodes}
    filtered_indices = set(barcode_mapping.keys())
    df = df[df['Barcode_Index'].isin(filtered_indices)]

    # Map indices to gene names and barcodes
    df['Gene_Name'] = df['Gene_Index'].map(lambda x: gene_names[x])
    df['Barcode'] = df['Barcode_Index'].map(barcode_mapping)

    # Pivot to create barcode x gene matrix
    df_pivot = df.pivot_table(index='Barcode', columns='Gene_Name', values='UMI_Count', aggfunc='sum', fill_value=0)

    # Drop the index (barcodes) entirely
    df_pivot = df_pivot.reset_index(drop=True)

    # Save without barcodes
    df_pivot.to_csv(output_file, index=False)

    print(f"Expressed genes file saved as: {output_file}")


In [None]:
def create_spot_position_file(input_file, output_file="SpotPosition.csv"):
    """
    Convert a tissue_positions_list.csv file into a SpotPosition.csv file.

    Parameters:
        input_file (str): Path to the tissue_positions_list.csv file.
        output_file (str): Path where the SpotPosition.csv will be saved 
                           (default is "SpotPosition.csv").

    """

    # Read the tissue_positions_list.csv file (no header in original file)
    df = pd.read_csv(input_file, header=None)

    # Extract required columns: column 0 -> barcode, column 4 -> x, column 5 -> y
    df_new = pd.DataFrame()
    df_new["barcode"] = df[0]
    df_new["x"] = df[4]
    df_new["y"] = df[5]

    # Add constant radius column
    df_new["radius"] = 71.19291127

    # Save the processed DataFrame to CSV
    df_new.to_csv(output_file, index=False)

    print(f"Spot position file saved as {output_file}")
