In [3]:
import scanpy as sc
import numpy as np
import pandas as pd

In [None]:
adata = sc.read("../data/melanoma_scRNA_processed.h5ad")

In [None]:
# Based on the cell types we got, map and assign them to the patients
def compute_patient_composition(
    adata,
    patient_col="PatientID",
    celltype_col="pred_celltype",
    response_col="Response"
):
    rows = []
    
    for pid, sub in adata.obs.groupby(patient_col):
        total = len(sub)
        if total == 0:
            continue

        row = {"PatientID": pid}

        # Cell type fractions
        type_counts = sub[celltype_col].value_counts(normalize=True)
        for ct, frac in type_counts.items():
            row[f"frac_{ct}"] = frac
        
        # CD8 memory/exhaust fraction
        if "CD8_state" in sub.columns:
            row["CD8_memory_frac"] = np.mean(sub["CD8_state"] == "Memory_like")
            row["CD8_exhaust_frac"] = np.mean(sub["CD8_state"] == "Exhausted_like")
            row["CD8_mem_exh_ratio"] = (
                row["CD8_memory_frac"] / (row["CD8_exhaust_frac"] + 1e-6)
            )
        else:
            row["CD8_memory_frac"] = 0
            row["CD8_exhaust_frac"] = 0
            row["CD8_mem_exh_ratio"] = 0

        # Clinical label (Responder / Non)
        row["Response"] = sub[response_col].iloc[0]

        rows.append(row)

    df = pd.DataFrame(rows).fillna(0.0)
    return df


In [None]:
# Basic set of features; primarily cell type composition and CD8 state
patient_features = compute_patient_composition(adata)
patient_features.to_csv("../data/patient_celltype_composition.csv", index=False)
patient_features

  for pid, sub in adata.obs.groupby(patient_col):


Unnamed: 0,PatientID,frac_CD8_T,frac_NK,frac_T_cells,frac_CD4_T,frac_B_cells,frac_Dendritic,frac_Plasma,frac_Myeloid,CD8_memory_frac,CD8_exhaust_frac,CD8_mem_exh_ratio,Response
0,P1,0.286369,0.271478,0.202749,0.123711,0.059565,0.028637,0.017182,0.010309,0.392898,0.607102,0.647169,Responder
1,P2,0.364017,0.16318,0.242678,0.112971,0.02371,0.009763,0.027894,0.055788,0.193863,0.806137,0.240484,Non-responder
2,P3,0.22973,0.161123,0.339917,0.16632,0.032225,0.022869,0.006237,0.04158,0.527027,0.472973,1.114283,Non-responder
3,P4,0.091278,0.089249,0.312373,0.092292,0.263692,0.01927,0.037525,0.09432,0.51927,0.48073,1.080167,Non-responder
4,P5,0.417197,0.097134,0.253185,0.122611,0.009554,0.020701,0.017516,0.062102,0.240446,0.759554,0.316561,Non-responder
5,P6,0.06686,0.196221,0.18314,0.161337,0.034884,0.02907,0.203488,0.125,0.219477,0.780523,0.281191,Non-responder
6,P7,0.173623,0.195326,0.307179,0.180301,0.096828,0.010017,0.003339,0.033389,0.462437,0.537563,0.860247,Responder
7,P8,0.186391,0.193787,0.220414,0.071006,0.233728,0.02071,0.016272,0.057692,0.494083,0.505917,0.976606,Responder
8,P10,0.171717,0.159091,0.323232,0.174242,0.015152,0.010101,0.111111,0.035354,0.388889,0.611111,0.636363,Non-responder
9,P11,0.169329,0.191693,0.412141,0.092652,0.025559,0.028754,0.003195,0.076677,0.271565,0.728435,0.372807,Non-responder
