In [1]:
import pandas as pd
import scanpy as sc

In [3]:
# table with clinical responses
meta = pd.read_csv('data/GSE120575_patient_ID_single_cells.txt.gz', 
                   sep="\t", encoding="latin", 
                   skiprows=19).iloc[:, :7]
# remove additional information after sample table
meta = meta.iloc[:16291]

# rename columns by removing the `characteristics: ` prefix to make it more concise
meta.columns = [x.replace("characteristics: ", "") for x in meta.columns]

# rename the long `patient ID (...)` column to simple sample_id
meta.rename(
    columns={"patinet ID (Pre=baseline; Post= on treatment)": "sample_id"},
    inplace=True
)

# add columns that seperate sample_id for later analysis
meta["patient_id"] = [x.split("_")[1] for x in meta.sample_id]
meta["time_point"] = [x.split("_")[0] for x in meta.sample_id]

meta

Unnamed: 0,Sample name,title,source name,organism,sample_id,response,therapy,patient_id,time_point
0,Sample 1,A10_P3_M11,Melanoma single cell,Homo sapiens,Pre_P1,Responder,anti-CTLA4,P1,Pre
1,Sample 2,A11_P1_M11,Melanoma single cell,Homo sapiens,Pre_P1,Responder,anti-CTLA4,P1,Pre
2,Sample 3,A11_P3_M11,Melanoma single cell,Homo sapiens,Pre_P1,Responder,anti-CTLA4,P1,Pre
3,Sample 4,A11_P4_M11,Melanoma single cell,Homo sapiens,Pre_P1,Responder,anti-CTLA4,P1,Pre
4,Sample 5,A12_P3_M11,Melanoma single cell,Homo sapiens,Pre_P1,Responder,anti-CTLA4,P1,Pre
...,...,...,...,...,...,...,...,...,...
16286,Sample 16287,H5_P5_M67_L001_T_enriched,Melanoma single cell,Homo sapiens,Post_P6,Non-responder,anti-PD1,P6,Post
16287,Sample 16288,H6_P5_M67_L001_T_enriched,Melanoma single cell,Homo sapiens,Post_P6,Non-responder,anti-PD1,P6,Post
16288,Sample 16289,H7_P5_M67_L001_T_enriched,Melanoma single cell,Homo sapiens,Post_P6,Non-responder,anti-PD1,P6,Post
16289,Sample 16290,H8_P5_M67_L001_T_enriched,Melanoma single cell,Homo sapiens,Post_P6,Non-responder,anti-PD1,P6,Post


In [6]:
# table with sc gene expression
ge = pd.read_csv('data/GSE120575_melanoma_scrna_tpm.txt.gz, 
                 sep="\t", skiprows=3)
ge

Unnamed: 0,TNMD,0.00,0.00.1,0.00.2,0.00.3,0.00.4,0.00.5,0.00.6,0.00.7,0.00.8,...,0.00.16276,0.00.16277,0.00.16278,0.00.16279,0.00.16280,0.00.16281,0.00.16282,0.00.16283,0.00.16284,Unnamed: 16292
0,DPM1,0.0,9.24,0.0,7.99,0.0,0.0,0.0,0.0,8.44,...,6.60,0.00,0.0,0.00,7.34,0.0,0.0,0.0,0.0,
1,SCYL3,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,0.00,...,5.15,2.54,0.0,0.00,0.00,0.0,0.0,0.0,0.0,
2,C1orf112,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,0.00,...,8.03,0.00,0.0,0.61,7.65,0.0,0.0,0.0,0.0,
3,FGR,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,8.24,...,0.00,0.00,0.0,6.72,0.00,0.0,0.0,0.0,0.0,
4,CFH,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,0.00,...,0.00,0.00,0.0,0.00,0.00,0.0,0.0,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55730,RP4-621B10.8,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,0.00,...,0.00,0.00,0.0,0.00,0.00,0.0,0.0,0.0,0.0,
55731,RP11-114I8.4,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,0.00,...,0.00,0.00,0.0,0.00,0.00,0.0,0.0,0.0,0.0,
55732,RP11-180C16.1,0.0,0.00,0.0,0.00,0.0,1.1,0.0,0.0,0.00,...,0.00,0.00,0.0,0.00,0.00,0.0,0.0,0.0,0.0,
55733,AP000230.1,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,0.00,...,0.00,0.00,0.0,0.00,0.00,0.0,0.0,0.0,0.0,


In [None]:
# set scanpy plotting parameter to dpi = 150
sc.settings.set_figure_params(dpi=150)

ge_file = "data/melanoma_gse120575_scrna_tpm.txt.gz"

# transpose expression matrix
ge = sc.read_text(ge_file).transpose()

ge.shape