In [1]:
import scanpy as sc
import pandas as pd 
import numpy as np
import seaborn as sns

from scipy.stats import pearsonr

from sklearn.linear_model import ElasticNetCV, RidgeCV, LassoCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate, KFold
from sklearn.metrics import make_scorer, r2_score

In [2]:
import sys
import os
from importlib import reload

# Add the path of your package
package_path = os.path.abspath("/Genomics/pritykinlab/tamjeed/github_packages/GlossPath/")
sys.path.insert(0, package_path)

In [3]:
import matplotlib.pyplot as plt

In [4]:
import pickle

In [5]:
og_adata = sc.read_h5ad('../joint_notebooks/datasets/lipstic_tumor_data_aug_11_2024.h5ad')

In [6]:
from scipy.sparse import csr_matrix

In [13]:
with open('../pathway_computations/humanbase_cd40neighbors.pickle', 'rb') as handle:
    cd40neighbors = pickle.load(handle)

In [17]:
with open('../pathway_computations/humanbase_dc_sweep.pickle', 'rb') as handle:
    dc_neighbors = pickle.load(handle)

## creating 10 different multiple permutations

In [11]:
y = og_adata.obs['biotin_raw']
for i in range(10):
    print(i)
    adata = og_adata.copy()
    adata_df = adata.to_df().copy()
    cd40_df = adata_df[adata_df.columns.intersection(cd40neighbors['hb_macrophage_cd40'])]

    np.random.seed(i)
    random_factors = np.random.uniform(0.5, 1.5, size=(cd40_df.shape[0], 1))
    random_factors.flatten()

    result_df = np.round(cd40_df * random_factors).astype(int).astype(float)

    adata_df[result_df.columns] = result_df

    result_y = np.round(y * random_factors.flatten()).astype(int).astype(float)
    result_y = np.where(y >= 1, np.maximum(result_y, 1), result_y)

    print(np.percentile(result_y, 5))

    adata.obs['biotin_raw_perturbed'] = result_y

    csr_matrix(adata_df)
    adata.X = csr_matrix(adata_df)

    adata.write('../joint_notebooks/datasets/{}_lipstic_tumor_data_perturbed_v4_m_oct_21_2024.h5ad'.format(i))

0
1.0
1
1.0
2
1.0
3
1.0
4
1.0
5
1.0
6
1.0
7
1.0
8
1.0
9
1.0


## creating 10 different multiple permutations - DC

In [18]:
y = og_adata.obs['biotin_raw']
for i in range(10):
    print(i)
    adata = og_adata.copy()
    adata_df = adata.to_df().copy()
    cd40_df = adata_df[adata_df.columns.intersection(dc_neighbors['dc_0.4'])]

    np.random.seed(i)
    random_factors = np.random.uniform(0.5, 1.5, size=(cd40_df.shape[0], 1))
    random_factors.flatten()

    result_df = np.round(cd40_df * random_factors).astype(int).astype(float)

    adata_df[result_df.columns] = result_df

    result_y = np.round(y * random_factors.flatten()).astype(int).astype(float)
    result_y = np.where(y >= 1, np.maximum(result_y, 1), result_y)

    print(np.percentile(result_y, 5))

    adata.obs['biotin_raw_perturbed'] = result_y

    csr_matrix(adata_df)
    adata.X = csr_matrix(adata_df)

    adata.write('../joint_notebooks/datasets/{}_lipstic_tumor_data_perturbed_v4_dc04_oct_24_2024.h5ad'.format(i))

0
1.0
1
1.0
2
1.0
3
1.0
4
1.0
5
1.0
6
1.0
7
1.0
8
1.0
9
1.0
