# TGCT cancer proliferation example
------------------------------------------------------
Follow the article for the TCGA samples that match the testicular germ cell tumor(TGCT).
Start with all the needed imports:

In [None]:
from scipy.stats.stats import spearmanr
import GOcSL

# 1. Read the relevant data
In this case, we focus on the TGCT data.

1. Have the PCNA genes in csv list in working dir.
2. Have Table S1a in working dir.
3. Have a list of TCGA samples that match the testicular germ cell tumor(TGCT) in csv list in working dir.
4. Load TCGA data and take from it only the Genes that are in the Human GAF and filter into a new var the TGCT columns

In [None]:
PCNA = GOcSL.dataloader.get_PCNA_genes(GOcSL.constants.PCNA_DATA_PATH)
# cSL  = GOcSL.dataloader.get_cSL_genes(GOcSL.constants.CSL_DATA_PATH)
TCGA = GOcSL.dataloader.get_TCGA_data(GOcSL.constants.TCGA_DATA_PATH, GOcSL.constants.TGCT_SAMPLES_PATH)

# 2. Calculate the PI (Proliferation Index)
For each sample calculate:
1. The PI that is based on the meta-PCNA and computed as the median expression values of the set of meta-PCNA genes
2. cSL load using the 33 percentile genes
3. GOcSL load using the 1 percentile genes
4. Spearman correlation between 1 and 2, and 1 and 3 

In [None]:
cancer_PI  = GOcSL.utils.get_PI(PCNA, TCGA)
CSL        = GOcSL.cSL.CSL_Classifier(GOcSL.constants.CSL_DATA_PATH, TCGA)
cSL_load   = CSL.get_cSL_load(33)
GORF       = GOcSL.GO2SL.GO_Classifier(GOcSL.constants.HORLBECK_DATA_PATH, GOcSL.constants.GO_BASIC_PATH, 
                                       GOcSL.constants.HUMAN_GAF_PATH, TCGA, GOcSL.constants.RANDOM_FOREST)
GOcSL_load = GORF.get_GOcSL_load(1)
print('The cSL load and cancer proliferation Spearman correlation\n')
print(GOcSL.utils.correlate(cSL_load,cancer_PI, GOcSL.constants.SPEARMAN))
print('The GOcSL load and cancer proliferation Spearman correlation\n')
print(GOcSL.utils.correlate(GOcSL_load,cancer_PI, GOcSL.constants.SPEARMAN))