In [1]:
import pyreadr, pickle, csv, re, os, tqdm
import numpy as np
import pandas as pd
import tifffile

import torch
import torch.nn as nn

from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle

import matplotlib.pyplot as plt

import pyarrow.parquet as pq

In [2]:
# Data Dependencies
path = '/home/sam/scRNAseq/Xenium/Network_genes_NoiseInjection.RData'
model_path = "/home/sam/scRNAseq/Xenium/AlonNN/NoiseInj/model_state_epochs_150_earlyStop_50_l1_0.0001_depth_5_withSkips_seed_18.pt"

# Model Loading And Configuration

In [3]:
# Load scRNAseq Dataset 
rdata = pyreadr.read_r(path)
# Load data
df = rdata['Retina_expMatrix_candidateGenes']
df['Cluster'] = df['Cluster'].apply(lambda x: x if len(x.split('_')[0]) == 2 else '0' + x) # Standardize cluster names


# Load the list of indices for each network to use
class_net_genes = rdata['Class_indices'].to_numpy().ravel()
rgc_net_genes = rdata['RGC_indices'].to_numpy().ravel()
ac_net_genes = rdata['AC_indices'].to_numpy().ravel()
bc_net_genes = rdata['BC_indices'].to_numpy().ravel()
hc_net_genes = rdata['HC_indices'].to_numpy().ravel()
nn_net_genes = rdata['NonNeural_indices'].to_numpy().ravel()

# Encode and format scRNAseq dat
df['cluster'] = df['Cluster']

def encode_class(arr):
    '''This function will encode subtypes' cell classesbased on expert rules and is not intended for decoding'''
    custom_array = []

    for value in arr:
        if re.match(r'^\d{2}_', value):
            custom_array.append(0)
        elif value.startswith('AC_'):
            custom_array.append(1)
        elif value.endswith('Photoreceptors'):
            custom_array.append(2)
        elif value == '0MG (Mueller Glia)':
            custom_array.append(3)
        elif value.startswith('0BC'):
            custom_array.append(4)
        elif value.startswith('0RBC'):
            # Note this duplication is for simplicity of handling the 2 BC naming conventions
            custom_array.append(4)
        elif value == '0Horizontal Cell':
            custom_array.append(5)
        elif value == '0Pericyte':
            custom_array.append(6)
        elif value == '0Endothelial':
            custom_array.append(6)
        elif value == '0Microglia':
            custom_array.append(6)   
        else:
            custom_array.append(7)
    return custom_array

# Function to generate a sort key for each subclass name
def sort_key(name):
    if name.startswith('0BC'):
        return 4
    elif name.startswith('0RBC'):
        return 4
    elif re.match(r'^\d{2}_', name):
        return 0
    elif name.startswith('AC_'):
        return 1
    elif name.endswith('Photoreceptors'):
        return 2
    elif name == '0MG (Mueller Glia)':
        return 3
    elif name == '0Horizontal Cell':
        return 5
    elif name == '0Pericyte':
        return 6
    elif name == '0Endothelial':
        return 6
    elif name == '0Microglia':
        return 6
    else:
        return 7

# Apply the sort_key function to each subclass name and sort the DataFrame
df['sort_key'] = df['Cluster'].apply(sort_key)
df.sort_values(by='sort_key', inplace=True)
df.drop(columns='sort_key', inplace=True)  # Optionally remove the sort key

class_arr = encode_class(df['Cluster'])

# Encode the categoric response 
le = LabelEncoder()
df['Cluster'] = le.fit_transform(df['Cluster'])

cluster_col = df.pop('Cluster')
dataset_col = df.pop('Dataset')
df.insert(len(df.columns), 'Cluster', cluster_col)
df.insert(len(df.columns), 'Class', class_arr)

display(df)

def create_mapping(df):
    # Extract the unique pairs of encoded cluster values and their corresponding class encodings
    unique_pairs = df[['Cluster', 'Class']].drop_duplicates()
    
    # Create a dictionary mapping from Cluster to Class
    mapping = dict(zip(unique_pairs['Cluster'], unique_pairs['Class']))
    
    return mapping

# Usage
mapping = create_mapping(df)

print(mapping)


# Utilize the mapping to count the number of subclasses for each class
subclass_counts = {i: 0 for i in range(8)}  # Initialize counts for 6 classes
for _, class_id in mapping.items():
    subclass_counts[class_id] += 1

# Construct the class_info dictionary
class_info = {
    'Genes': class_net_genes,  # Genes used for class classification
    'num_classes': 8,  # Total number of classes
    0: {  # Information for class 0 (RGCs)
        'Genes': rgc_net_genes,
        'num_subclasses': subclass_counts[0]
    },
    1: {  # Information for class 1 (ACs)
        'Genes': ac_net_genes,
        'num_subclasses': subclass_counts[1]
    },
    2: {  # Information for class 2
        'Genes': bc_net_genes,
        'num_subclasses': subclass_counts[2]
    },
    3: {  # Information for class 3
        'Genes': bc_net_genes,
        'num_subclasses': subclass_counts[3]
    },
    4: {  # Information for class 4BC
        'Genes': bc_net_genes,
        'num_subclasses': subclass_counts[4]
    },
    5: {  # Information for class 5 HCs
        'Genes': hc_net_genes,
        'num_subclasses': subclass_counts[5]  
    },
    6: {  # Information for class 6 NonNeural
        'Genes': nn_net_genes,
        'num_subclasses': subclass_counts[6]  
    },
    7: {  # Information for class 5 (Catch-all class)
        'Genes': class_net_genes,
        'num_subclasses': 1  # Only 1 subclass as it's a catch-all class
    }
}

# Add 'num_hidden' with default zero to each tentacle
for c in range(class_info['num_classes']):
    class_info[c]['num_hidden'] = 0
    class_info[c]['skip'] = False
    

l_arm = 5
skip = True
class_info[0]['num_hidden'] = l_arm # RGC Arm
class_info[1]['num_hidden'] = l_arm # AC Arm
class_info[0]['skip'] = skip # RGC Arm
class_info[1]['skip'] = skip # AC Arm

Unnamed: 0_level_0,Kcnip4,Isl2,Glra1,Zic1,Syndig1l,Isl1,Pou3f1,Mmp9,Grm5,Cpne4,...,Glrb,Rbpms,Vamp1,Cspg4,Kcnq1ot1,Cdh5,Foxp1,cluster,Cluster,Class
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
aRGC4_AGCGTATGTAGCCTAT.1,0.250,0.000,0.000,0.021,0.100,0.045,0.0,0.000,0.000,0.000,...,0.032,0.108,0.114,0.05,0.167,0.000,0.000,25_Novel,45,0
aRGC3_GACTACAAGTGCGTGA.1,0.250,0.000,0.048,0.000,0.025,0.000,0.0,0.099,0.077,0.048,...,0.000,0.162,0.000,0.00,0.083,0.167,0.019,01_W3D1.1,0,0
aRGC4_GTACGTATCGCGTAGC.1,0.125,0.000,0.000,0.021,0.075,0.091,0.0,0.155,0.154,0.048,...,0.159,0.135,0.025,0.00,0.167,0.000,0.000,10_Novel,30,0
aRGC4_GTACGTATCTGTCTCG.1,0.062,0.111,0.048,0.000,0.000,0.091,0.0,0.000,0.000,0.000,...,0.111,0.000,0.051,0.00,0.000,0.000,0.056,32_F_Novel,52,0
aRGC4_GTACTCCAGATTACCC.1,0.312,0.000,0.000,0.021,0.000,0.136,0.0,0.014,0.000,0.143,...,0.127,0.270,0.228,0.00,0.000,0.000,0.259,04_FminiOFF,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4951,0.000,0.000,0.000,0.021,0.000,0.000,0.0,0.000,0.000,0.000,...,0.032,0.000,0.000,0.05,0.000,0.167,0.000,0Endothelial,23,6
4941,0.062,0.000,0.000,0.021,0.000,0.045,0.0,0.000,0.077,0.000,...,0.000,0.000,0.013,0.00,0.000,0.333,0.000,0Endothelial,23,6
4931,0.000,0.056,0.000,0.000,0.000,0.000,0.0,0.014,0.077,0.000,...,0.048,0.000,0.038,0.05,0.000,0.333,0.000,0Endothelial,23,6
2102,0.000,0.000,0.000,0.000,0.000,0.045,0.0,0.000,0.000,0.000,...,0.048,0.000,0.000,0.45,0.000,0.000,0.000,0Pericyte,27,6


{45: 0, 0: 0, 30: 0, 52: 0, 3: 0, 4: 0, 2: 0, 33: 0, 36: 0, 5: 0, 61: 0, 6: 0, 46: 0, 48: 0, 8: 0, 38: 0, 35: 0, 51: 0, 37: 0, 47: 0, 34: 0, 49: 0, 31: 0, 1: 0, 32: 0, 40: 0, 39: 0, 55: 0, 7: 0, 43: 0, 50: 0, 41: 0, 62: 0, 54: 0, 57: 0, 56: 0, 42: 0, 44: 0, 58: 0, 60: 0, 53: 0, 59: 0, 65: 0, 63: 0, 64: 0, 126: 1, 100: 1, 66: 1, 67: 1, 88: 1, 77: 1, 127: 1, 76: 1, 70: 1, 72: 1, 99: 1, 89: 1, 121: 1, 74: 1, 102: 1, 110: 1, 94: 1, 93: 1, 96: 1, 83: 1, 95: 1, 92: 1, 68: 1, 86: 1, 69: 1, 82: 1, 75: 1, 80: 1, 91: 1, 78: 1, 113: 1, 87: 1, 105: 1, 71: 1, 108: 1, 128: 1, 79: 1, 103: 1, 90: 1, 85: 1, 81: 1, 84: 1, 101: 1, 124: 1, 97: 1, 116: 1, 120: 1, 104: 1, 118: 1, 111: 1, 98: 1, 115: 1, 73: 1, 109: 1, 106: 1, 122: 1, 114: 1, 112: 1, 107: 1, 119: 1, 117: 1, 123: 1, 125: 1, 22: 2, 29: 2, 25: 3, 18: 4, 15: 4, 28: 4, 19: 4, 13: 4, 9: 4, 20: 4, 12: 4, 10: 4, 11: 4, 17: 4, 21: 4, 14: 4, 16: 4, 24: 5, 26: 6, 27: 6, 23: 6}


In [4]:
class TentacleNet(nn.Module):
    def __init__(self, input_size, num_subclasses, num_hidden, skip = False):
        super(TentacleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 2*num_subclasses)
        self.num_hidden = num_hidden
        self.skip = skip+0
        if self.num_hidden > 0:
            self.hidden = nn.ModuleList([nn.Linear(2*num_subclasses, 2*num_subclasses) for _ in range(num_hidden)])
        self.fc2 = nn.Linear(2*num_subclasses, num_subclasses)

    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        if self.num_hidden > 0:
            x_skip = x*self.skip  # Save output of fc1 for skip connection
            for hidden_layer in self.hidden:
                x = nn.functional.relu(hidden_layer(x))
            x = x + x_skip  # Add skip connection before final activation
        x = self.fc2(x)
        return nn.functional.log_softmax(x, dim=1)


class CuttleNet(nn.Module):
    def __init__(self, class_info, mapping):
        super(CuttleNet, self).__init__()
        
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.n = len(mapping) # Number of subclasses
        self.class_info = class_info

        # Class Classifier
        self.class_fc1 = nn.Linear(len(class_info['Genes']), 2*class_info['num_classes'])
        self.class_fc2 = nn.Linear(2*class_info['num_classes'], class_info['num_classes'])

        # Subclass Classifiers
        self.subclass_nets = nn.ModuleDict({
            str(class_id): TentacleNet(input_size=len(subclass_info['Genes']) + class_info['num_classes'], 
                                       num_subclasses=subclass_info['num_subclasses'],
                                      num_hidden = subclass_info['num_hidden'],
                                      skip = subclass_info['skip'])
            for class_id, subclass_info in class_info.items()
            if isinstance(class_id, int)
        })
        
        # Calculate the number of subclasses for each class
        self.num_subclasses_per_class = self.calculate_subclasses_per_class(mapping)
        
    def get_subclass_range_for_class(self, class_id):
        start_index = sum(self.num_subclasses_per_class[cid] for cid in range(class_id))
        end_index = start_index + self.num_subclasses_per_class[class_id]
        return slice(start_index, end_index)
    
    def calculate_subclasses_per_class(self, mapping):
        """
        Calculate the number of subclasses for each class using the mapping.
        """
        num_subclasses_per_class = {class_id: 0 for class_id in range(self.class_info['num_classes'])}
        for subclass_id in mapping.keys():
            class_id = mapping[subclass_id]
            num_subclasses_per_class[class_id] += 1
        return num_subclasses_per_class

    def forward(self, x):
        # Class classification
        class_genes = x[:, self.class_info['Genes']]
        class_x = nn.functional.relu(self.class_fc1(class_genes))
        class_output = nn.functional.log_softmax(self.class_fc2(class_x), dim=1)

        # Initialize an output tensor for all subclasses
        all_subclass_output = torch.zeros(x.size(0), 130, device=self.device)  # Assuming 130 total subclasses

        # Populate the output tensor
        for class_id, subclass_info in self.class_info.items():
            if isinstance(class_id, int):
                subclass_genes = x[:, subclass_info['Genes']]
                subclass_input = torch.cat((subclass_genes, class_output), dim=1)

                # Convert class_id to string
                class_id_str = str(class_id)
                subclass_output = self.subclass_nets[class_id_str](subclass_input)

                # Get the range for this class's subclasses
                subclass_range = self.get_subclass_range_for_class(class_id)

                # Multiply subclass predictions by the class prediction probability
                all_subclass_output[:, subclass_range] = subclass_output * class_output[:, class_id].unsqueeze(1)

        return all_subclass_output

    
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = CuttleNet(class_info=class_info, mapping=mapping)

# Load the model state
model.load_state_dict(torch.load(model_path, map_location=device))

# Move the model to the appropriate device and set it to evaluation mode
model.to(device)
model.eval()

CuttleNet(
  (class_fc1): Linear(in_features=238, out_features=16, bias=True)
  (class_fc2): Linear(in_features=16, out_features=8, bias=True)
  (subclass_nets): ModuleDict(
    (0): TentacleNet(
      (fc1): Linear(in_features=278, out_features=90, bias=True)
      (hidden): ModuleList(
        (0-4): 5 x Linear(in_features=90, out_features=90, bias=True)
      )
      (fc2): Linear(in_features=90, out_features=45, bias=True)
    )
    (1): TentacleNet(
      (fc1): Linear(in_features=284, out_features=126, bias=True)
      (hidden): ModuleList(
        (0-4): 5 x Linear(in_features=126, out_features=126, bias=True)
      )
      (fc2): Linear(in_features=126, out_features=63, bias=True)
    )
    (2): TentacleNet(
      (fc1): Linear(in_features=233, out_features=4, bias=True)
      (fc2): Linear(in_features=4, out_features=2, bias=True)
    )
    (3): TentacleNet(
      (fc1): Linear(in_features=233, out_features=2, bias=True)
      (fc2): Linear(in_features=2, out_features=1, bias=

# Inference

In [5]:
# Step 1: Select the slice
df_slice = df.iloc[:, -3:-1]

# Step 2: Drop the index
df_slice_reset = df_slice.reset_index(drop=True)

# Step 3: Remove duplicate rows
df_unique = df_slice_reset.drop_duplicates()

# Step 4: Sort by the last column (you can reference it by its position since it's a slice)
df_sorted = df_unique.sort_values(by=df_unique.columns[-1])
clust_ids = list(df_sorted[['cluster']].values.ravel())
clust_ids.append('other')

# Extract Correct gene order
correct_order = np.array(df.iloc[:,0:-3].columns)
# Correct any mislabeled genes
gene_rename_map = {
'Gm11744': 'Prcd',
'Fam19a3': 'Tafa3',
#     'A730046J19Rik': 'Sertm2',
'Fam19a1': 'Tafa1',
'Cyr61': 'Ccn1'
}
correct_order = np.array([gene_rename_map.get(gene, gene) for gene in correct_order])

In [21]:
def create_count_matrix(df, correct_order):
    # Pivot table to count occurrences of feature_name for each cell_id
    count_matrix = pd.pivot_table(df, index='cell_id', columns='feature_name', aggfunc='size', fill_value=0)
    count_matrix = count_matrix.loc[:,correct_order] # extract genes only
    
    return count_matrix

def CuttleNet_Inference(data_root, clust_ids, correct_order,
                        chunk_size = 10000):


    print(f'Loading experiment data from {data_root}')
    # load transcript data
    data_path = os.path.join(data_root,"transcripts.parquet")

    # Create an empty list to hold chunks
    data_chunks = []
    n_cells = 0
    xen_data = pd.read_parquet(data_path,columns=['cell_id', 'feature_name'])

    data = create_count_matrix(xen_data, correct_order)

    # Remove the row with the index 'UNASSIGNED'
    data = data.drop('UNASSIGNED')
    # Normalize each column by its maximum value, multiply by 1000, round, and divide by 1000
    data = data.apply(lambda x: round(1000 * x / x.max()) / 1000)
    # store cell ids
    cell_ids = list(data.index)

    # Convert to torch tensor and store on GPU
    expMatrix = data.to_numpy()
    expMatrix = torch.tensor(expMatrix, dtype=torch.float32)

    print(f'Data loaded containing {len(cell_ids)} cells')

    # Calculate the number of chunks needed
    n_chunks = int(np.ceil(expMatrix.size(0) / chunk_size))

    # Placeholder to collect the output
    results = []

    print('Performing Inference')
    # Process each chunk
    for i in tqdm.tqdm(range(n_chunks)):
        # Calculate the start and end indices of the current chunk
        start_idx = i * chunk_size
        end_idx = min((i + 1) * chunk_size, expMatrix.size(0))

        # Extract the chunk
        chunk = expMatrix[start_idx:end_idx]

        # Move the chunk to GPU
        chunk = chunk.to('cuda')

        # Perform inference
        with torch.no_grad():  # Ensure gradients are not computed to save memory
            chunk_output = model(chunk)

        # Move the results back to CPU and store them
        chunk_output = chunk_output.cpu()
        results.append(chunk_output)

    # Concatenate the results into a single tensor
    final_results = torch.cat(results, dim=0)

    final_df = pd.DataFrame(final_results.numpy(), columns= clust_ids)
    # Add Prediction column
    final_df['Prediction'] = final_df.idxmax(axis=1)
    # Add cell_ids column
    final_df['cell_id'] = cell_ids

    
    # Load and store cell shape information    
    print('Loading centroid information')
#     cell_shape_path = os.path.join(data_root,"cell_boundaries.csv.gz")
#     nuc_shape_path = os.path.join(data_root,"nucleus_boundaries.csv.gz")
    cent_shape_path = os.path.join(data_root,"cells.parquet")

#     cell_shape_data = pd.read_csv(cell_shape_path, compression='gzip') # load data
#     cell_shape_data = cell_shape_data.add_suffix('_cell') # add specific suffix to columns
#     cell_shape_data = cell_shape_data.rename(index=str, columns={'cell_id_cell':'cell_id'}) # Rename cell_id
#     nuc_shape_data = pd.read_csv(nuc_shape_path, compression='gzip') # load data
#     nuc_shape_data = nuc_shape_data.add_suffix('_nucleus') # add specific suffix to columns
#     nuc_shape_data = nuc_shape_data.rename(index=str, columns={'cell_id_nucleus':'cell_id'}) # Rename cell_id
#     bounds_shape_data = cell_shape_data.merge(nuc_shape_data, on='cell_id') # merge data
    cent_shape_data = pd.read_parquet(cent_shape_path, columns=['cell_id', 'x_centroid', 'y_centroid']) # load data
    final_df = final_df.merge(cent_shape_data, on='cell_id') # merge data
#     bounds_shape_data = bounds_shape_data.merge(cent_shape_data, on='cell_id') # merge data
    
#     final_df = final_df.merge(bounds_shape_data, on='cell_id') # merge data
    
    print('Inference and dataframe merging complete.')

    return final_df

In [22]:
experiments = {0 : {'slide' : '0018429',
                   'path' : '/media/sam/New Volume/Xenium_Data/output-XETG00230__0018429__Region_1__20240105__233208'},
               1 : {'slide' : '0018432',
                   'path' : '/media/sam/New Volume/Xenium_Data/output-XETG00230__0018432__Region_2__20240105__233208'},
               2 : {'slide' : '0018336',
                   'path' : '/media/sam/New Volume/Xenium_Data/BudoffRun2_Slide 3_4/BudoffRun2_Slide 3_4/output-XETG00230__0018336__Region_1__20240124__002923'},
               3 : {'slide' : '0018521',
                   'path' : '/media/sam/New Volume/Xenium_Data/BudoffRun2_Slide 3_4/BudoffRun2_Slide 3_4/output-XETG00230__0018521__Region_1__20240124__002923'},
               4 : {'slide' : '0018624',
                   'path' : '/media/sam/New Volume/Xenium_Data/BudoffRun3_Slide 5_6/BudoffRun3_Slide 5_6/output-XETG00230__0018624__Region_1__20240127__000149'},
               5 : {'slide' : '0022826',
                   'path' : '/media/sam/New Volume/Xenium_Data/BudoffRun3_Slide 5_6/BudoffRun3_Slide 5_6/output-XETG00230__0022826__Region_1__20240127__000149'},
               6 : {'slide' : '0018300',
                   'path' : '/media/sam/New Volume/Xenium_Data/BudoffRun4_Slide 7_8/BudoffRun4_Slide 7_8/output-XETG00230__0018300__Region_1__20240206__235339'},
               7 : {'slide' : '0022825',
                   'path' : '/media/sam/New Volume/Xenium_Data/BudoffRun4_Slide 7_8/BudoffRun4_Slide 7_8/output-XETG00230__0022825__Region_1__20240206__235339'}}

save_path = '/home/sam/scRNAseq/Xenium/Full_Inference_All_Experiments.csv'

for i in range(len(experiments)):
    data_root = experiments[i]['path']
    inf_df = CuttleNet_Inference(data_root, clust_ids, correct_order)
    inf_df['slide'] = experiments[i]['slide']
    if i == 0:
        full_df = inf_df.copy()
    else:
        full_df = pd.concat((full_df, inf_df))
    
    print('Saving full dataframe')
    full_df.to_csv(save_path)

Loading experiment data from /media/sam/New Volume/Xenium_Data/output-XETG00230__0018429__Region_1__20240105__233208
Data loaded containing 791635 cells
Performing Inference


100%|████████████████████████████████████████| 80/80 [00:00<00:00, 171.94it/s]


Loading centroid information
Inference and dataframe merging complete.
Saving full dataframe
Loading experiment data from /media/sam/New Volume/Xenium_Data/output-XETG00230__0018432__Region_2__20240105__233208
Data loaded containing 101846 cells
Performing Inference


100%|████████████████████████████████████████| 11/11 [00:00<00:00, 159.66it/s]

Loading centroid information
Inference and dataframe merging complete.





Saving full dataframe
Loading experiment data from /media/sam/New Volume/Xenium_Data/BudoffRun2_Slide 3_4/BudoffRun2_Slide 3_4/output-XETG00230__0018336__Region_1__20240124__002923
Data loaded containing 336896 cells
Performing Inference


100%|████████████████████████████████████████| 34/34 [00:00<00:00, 166.40it/s]


Loading centroid information
Inference and dataframe merging complete.
Saving full dataframe
Loading experiment data from /media/sam/New Volume/Xenium_Data/BudoffRun2_Slide 3_4/BudoffRun2_Slide 3_4/output-XETG00230__0018521__Region_1__20240124__002923
Data loaded containing 471721 cells
Performing Inference


100%|████████████████████████████████████████| 48/48 [00:00<00:00, 177.21it/s]


Loading centroid information
Inference and dataframe merging complete.
Saving full dataframe
Loading experiment data from /media/sam/New Volume/Xenium_Data/BudoffRun3_Slide 5_6/BudoffRun3_Slide 5_6/output-XETG00230__0018624__Region_1__20240127__000149
Data loaded containing 281791 cells
Performing Inference


100%|████████████████████████████████████████| 29/29 [00:00<00:00, 154.53it/s]


Loading centroid information
Inference and dataframe merging complete.
Saving full dataframe
Loading experiment data from /media/sam/New Volume/Xenium_Data/BudoffRun3_Slide 5_6/BudoffRun3_Slide 5_6/output-XETG00230__0022826__Region_1__20240127__000149
Data loaded containing 491503 cells
Performing Inference


100%|████████████████████████████████████████| 50/50 [00:00<00:00, 175.81it/s]


Loading centroid information
Inference and dataframe merging complete.
Saving full dataframe
Loading experiment data from /media/sam/New Volume/Xenium_Data/BudoffRun4_Slide 7_8/BudoffRun4_Slide 7_8/output-XETG00230__0018300__Region_1__20240206__235339
Data loaded containing 247413 cells
Performing Inference


100%|████████████████████████████████████████| 25/25 [00:00<00:00, 130.67it/s]


Loading centroid information
Inference and dataframe merging complete.
Saving full dataframe
Loading experiment data from /media/sam/New Volume/Xenium_Data/BudoffRun4_Slide 7_8/BudoffRun4_Slide 7_8/output-XETG00230__0022825__Region_1__20240206__235339
Data loaded containing 395189 cells
Performing Inference


100%|████████████████████████████████████████| 40/40 [00:00<00:00, 140.93it/s]


Loading centroid information
Inference and dataframe merging complete.
Saving full dataframe


In [23]:
test = pd.read_csv(save_path)
display(test.head())

Unnamed: 0.1,Unnamed: 0,01_W3D1.1,02_W3D1.2,03_FminiON,04_FminiOFF,05_J-RGC,06_W3B,07_Novel,08_Novel,09_Tbr1_Novel,...,AC_63,AC_7,AC_8,AC_9,other,Prediction,cell_id,x_centroid,y_centroid,slide
0,0,13.499385,13.473839,11.815074,11.647055,12.683795,12.163663,11.894971,11.693455,12.5088,...,-0.0,0.377131,13.855881,10.719368,0.0,AC_57,aaaaaefg-1,1504.220093,3392.329834,18429
1,1,20.84016,19.547691,16.48375,14.670475,20.701984,17.94405,18.40051,17.254879,17.839075,...,-0.0,0.006681,26.022734,32.609207,0.0,AC_13,aaaaagcc-1,1505.09436,3399.590088,18429
2,2,4.139815,3.592308,3.998503,2.680553,3.384927,3.572677,3.027277,2.990842,3.229471,...,-0.0,6.31353,8.020874,0.846739,0.0,AC_41,aaaaefcg-1,1505.547241,3376.981689,18429
3,3,5.000996,4.225929,3.77324,2.983864,4.478297,3.99088,3.84372,3.945924,4.336253,...,-0.0,18.441433,18.661541,0.078183,0.0,AC_1,aaaaejab-1,1505.079346,3385.811768,18429
4,4,8.722825,7.136056,7.496596,5.754329,8.066427,8.52516,6.790527,7.223733,8.75557,...,-0.0,6.728914,8.212195,0.579094,0.0,AC_27,aaaaenmn-1,1510.195923,3383.330566,18429


In [24]:
print(len(test))

3117994


In [25]:
395189+247413+491503+281791+471721+336896+101846+791635

3117994

In [27]:
for c in test.columns:
    print(c)

Unnamed: 0
01_W3D1.1
02_W3D1.2
03_FminiON
04_FminiOFF
05_J-RGC
06_W3B
07_Novel
08_Novel
09_Tbr1_Novel
0BC1A
0BC1B
0BC2
0BC3A
0BC3B
0BC4
0BC5A (Cone Bipolar cell 5A)
0BC5B
0BC5C
0BC5D
0BC6
0BC7 (Cone Bipolar cell 7)
0BC8/9 (mixture of BC8 and BC9)
0Cone Photoreceptors
0Endothelial
0Horizontal Cell
0MG (Mueller Glia)
0Microglia
0Pericyte
0RBC (Rod Bipolar cell)
0Rod Photoreceptors
10_Novel
11_Novel
12_ooDS_NT
13_Novel
14_ooDS_Cck
15_Novel
16_ooDS_DV
17_Tbr1_S1
18_Novel
19_Novel
20_Novel
21_Tbr1_S2
22_M5
23_W3D2
24_Novel
25_Novel
26_Novel
27_Novel
28_FmidiOFF
29_Novel
30_Novel
31_M2
32_F_Novel
33_M1
34_Novel
35_Novel
36_Novel
37_Novel
38_FmidiON
39_Novel
40_M1dup
41_AlphaONT
42_AlphaOFFS
43_AlphaONS
44_Novel
45_AlphaOFFT
AC_1
AC_10
AC_11
AC_12
AC_13
AC_14
AC_15
AC_16
AC_17
AC_18
AC_19
AC_2
AC_20
AC_21
AC_22
AC_23
AC_24
AC_25
AC_26
AC_27
AC_28
AC_29
AC_3
AC_30
AC_31
AC_32
AC_33
AC_34
AC_35
AC_36
AC_37
AC_38
AC_39
AC_4
AC_40
AC_41
AC_42
AC_43
AC_44
AC_45
AC_46
AC_47
AC_48
AC_49
AC_5
AC_50
A