# Basic Analysis of the GBM Sample

In [None]:
import gc
import os
os.chdir("/data1/lareauc/users/varelaa/giftwrap/notebooks")
import sys
sys.path.append("/data1/lareauc/users/varelaa/giftwrap/notebooks/")
from figures import *

import anndata as ad
import giftwrap as gw
import scanpy as sc
import squidpy as sq
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import spatialdata as sd
import spatialdata_io as sio
import matplotlib as mpl
from scipy.stats import gaussian_kde
#!pip install adjustText
import adjustText
mpl.rcParams['figure.dpi'] = 300
RESOLUTION=16  # in um

# Load our datasets

In [None]:
# First, our GIFT-seq dataset
gf_5a = "/data1/lareauc/projects/gapfill/analysis/20250816_GBM_visiumHD/20250815_GBM_5a_GapFill_GIFTWRAP"
gf_5b = "/data1/lareauc/projects/gapfill/analysis/20250816_GBM_visiumHD/20250815_GBM_5b_GapFill_GIFTWRAP"
gf_5a = "/data1/lareauc/projects/gapfill/analysis/20250816_GBM_visiumHD/SB-4218_GBM_5a_GapFill_GIFTWRAP"
gf_5b = "/data1/lareauc/projects/gapfill/analysis/20250816_GBM_visiumHD/SB-4218_GBM_5b_GapFill_GIFTWRAP"
adata_5a = gw.read_h5_file(gf_5a + "/counts.1.h5")
adata_5b = gw.read_h5_file(gf_5b + "/counts.1.h5")

In [None]:
wta_5a = sio.visium_hd("/data1/lareauc/projects/gapfill/analysis/20250816_GBM_visiumHD/GBM_5a_WTA/outs",
     dataset_id='')
# Convert to Zarr
wta_5a.write("./gbm_5a.zarr", overwrite=True)
# Re-read
del wta_5a
gc.collect()
wta_5a = sd.read_zarr("./gbm_5a.zarr")

wta_5b = sio.visium_hd("/data1/lareauc/projects/gapfill/analysis/20250816_GBM_visiumHD/GBM_5b_WTA/outs",
     dataset_id='')
# Convert to Zarr
wta_5b.write("./gbm_5b.zarr", overwrite=True)
# Re-read
del wta_5b
gc.collect()
wta_5b = sd.read_zarr("./gbm_5b.zarr")

# Pre-processing

In [None]:
adata_5a = gw.pp.filter_gapfills(adata_5a, min_cells=10)
adata_5b = gw.pp.filter_gapfills(adata_5b, min_cells=10)

In [None]:
adata_5a = gw.tl.call_genotypes(adata_5a)
adata_5b = gw.tl.call_genotypes(adata_5b)

In [None]:
adata_5a = gw.sp.join_with_wta(wta_5a, adata_5a)
adata_5a

In [None]:
adata_5b = gw.sp.join_with_wta(wta_5b, adata_5b)
adata_5b

# Look at Library Sizes

In [None]:
adata_5a.tables[f'gf_square_{RESOLUTION:03d}um'].var['size'] = adata_5a.tables[f'gf_square_{RESOLUTION:03d}um'].X.sum(axis=0)
df = adata_5a.tables[f'gf_square_{RESOLUTION:03d}um'].var.sort_values('size', ascending=False)
df.groupby('probe')['size'].sum().reset_index().sort_values('size', ascending=False).head(20)

In [None]:
adata_5b.tables[f'gf_square_{RESOLUTION:03d}um'].var['size'] = adata_5b.tables[f'gf_square_{RESOLUTION:03d}um'].X.toarray().sum(axis=0)
df = adata_5b.tables[f'gf_square_{RESOLUTION:03d}um'].var.sort_values('size', ascending=False)
df.groupby('probe')['size'].sum().reset_index().sort_values('size', ascending=False).head(20)

# Figures - Library Size

In [None]:
# plot_library_size(adata_5a, table='', resolution=RESOLUTION)

In [None]:
# plot_library_size(adata_5b, table='', resolution=RESOLUTION)

In [None]:
# plot_library_size(adata_5a, table='gf', resolution=RESOLUTION)

In [None]:
# plot_library_size(adata_5b, table='gf', resolution=RESOLUTION)

# Figures - Efficiency

In [None]:
plot_relative_efficiency(adata_5a, resolution=RESOLUTION, min_0bp_count=1_200, min_gf_count=750)

In [None]:
plot_relative_efficiency(adata_5b, resolution=RESOLUTION, min_0bp_count=1_200, min_gf_count=750)

# Plot the genotypes spatially

In [None]:
# Sort probes by capture rate and plot each on the spatial coords
probes = adata_5a.var.probe[~adata_5a.var.probe.str.contains("0bp")]
probes = sorted(probes, key=lambda x: adata_5a[:, adata_5a.var.probe == x].X.sum(), reverse=True)

for probe in probes:
    print(f"Plotting {probe}")
    plot_genotypes(adata_5a, probe, resolution=RESOLUTION)
    plt.show()
    plt.clf()

In [None]:
# Sort probes by capture rate and plot each on the spatial coords
probes = adata_5b.var.probe[~adata_5b.var.probe.str.contains("0bp")]
probes = sorted(probes, key=lambda x: adata_5b[:, adata_5b.var.probe == x].X.sum(), reverse=True)

for probe in probes:
    print(f"Plotting {probe}")
    plot_genotypes(adata_5b, probe, resolution=RESOLUTION)
    plt.show()
    plt.clf()