In [1]:
import pennylane as qml
from matplotlib import pyplot as plt
from pennylane import numpy as np
import scipy
import networkx as nx
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import copy
import os
import random
import torch
import itertools

In [None]:

csv_path = "/nfs/turbo/umms-ukarvind/rsudhars/quantum/lognorm_expression_by_pseudotime.csv"
df = pd.read_csv(csv_path, index_col=0)

# Split: exclude 20th row (index 19)
row_20 = df.iloc[[19]]     # double brackets to keep it as DataFrame
df_rest = df.drop(index=df.index[19])

# Apply MinMax scaling to the rest
scaler = MinMaxScaler()
df_scaled_rest = pd.DataFrame(scaler.fit_transform(df_rest), 
                              columns=df.columns,
                              index=df_rest.index)
arr = df_scaled_rest.to_numpy()

# Define conditions and choices
conditions = [
    (arr < 0.1585),
    (arr >= 0.1585) & (arr < 0.5),
    (arr >= 0.5) & (arr < 0.8415),
    (arr >= 0.8415)
]
choices = [0, 1, 2, 3]

 

# Apply the transformation
df_scaled_rest = pd.DataFrame(np.select(conditions, choices, default=0).astype(np.int32), index=df_scaled_rest.index, columns=df_scaled_rest.columns)
df_final = pd.concat([df_scaled_rest, row_20])

 
df_final


In [None]:
import pandas as pd
import numpy as np

# Assume df is your input dataframe (19 genes + 1 pseudotime row)
genes_df = df.iloc[:14, :]
pseudotime = df.iloc[14, :]

# Step 1: Assign each cell to 25 bins based on pseudotime
bin_labels = range(25)
bins = pd.qcut(pseudotime, q=25, labels=bin_labels)

# Step 2: Prepare output
compressed_data = []
pseudotime_medians = []

# Step 3: Iterate over bins
for bin_id in bin_labels:
    # Get indices (column names) of cells in this bin
    bin_cols = pseudotime.index[bins == bin_id]
    if len(bin_cols) == 0:
        continue  # skip empty bins

    # Subset first 6 genes for these columns (row by position, col by label)
    subset = df.iloc[:6, :].loc[:, bin_cols]



    # Convert each column into base-4 compressed integer
    base4_weights = np.array([4**i for i in reversed(range(6))])  # [1024, 256, 64, 16, 4, 1]
    values = subset.to_numpy().astype(int)
    compressed = values.T @ base4_weights  # shape: (num_cells_in_bin, )

    # Get median pseudotime of this bin
    median_pt = pseudotime[bin_cols].median()

    compressed_data.append(compressed)
    pseudotime_medians.append(median_pt)

# Step 4: Make output dataframe
output_df = pd.DataFrame(compressed_data, index=pseudotime_medians)

# Optional: sort by pseudotime
output_df = output_df.sort_index()
output_df 