In [1]:
###############################
#Author: Ayse Dincer
#Notebook for getting GTEx names of healthy tissue samples
###############################


In [2]:
import numpy as np
import pandas as pd
import csv

In [3]:
#Read sample names file
data_df = pd.read_table('GTEx_v7_Annotations_SampleAttributesDS.txt', sep = '\t', index_col=0)
print(data_df.shape)


(15598, 62)


In [13]:
#Define mapping for tissues
def getTissueMappings(cancer_name, tissue_name):

    tissue_mappings = data_df['SMTSD']
    selected_indices = tissue_mappings[[i for i, s in enumerate(tissue_mappings.values) if tissue_name in s]].index
    print("Count of samples ", len(selected_indices))
    print(tissue_mappings[selected_indices])
    
#     with open('../../ALL_CANCER_FILES/' + cancer_name + '/HEALTHY_TISSUE_FILES/GTEX_' + cancer_name + '_SAMPLES.txt', 'w') as f:
#         for item in selected_indices:
#             f.write("%s\n" % item)
            
    return tissue_mappings[selected_indices]



In [14]:
#Run it for each cancer type we have

In [16]:
getTissueMappings('BRCA', 'Breast')
print(np.unique(getTissueMappings('BRCA', 'Breast')))

Count of samples  306
SAMPID
GTEX-1117F-2826-SM-5GZXL    Breast - Mammary Tissue
GTEX-111YS-1926-SM-5GICC    Breast - Mammary Tissue
GTEX-1122O-1226-SM-5H113    Breast - Mammary Tissue
GTEX-117XS-1926-SM-5GICO    Breast - Mammary Tissue
GTEX-117YX-1426-SM-5H12H    Breast - Mammary Tissue
                                     ...           
GTEX-ZYT6-0126-SM-5E45J     Breast - Mammary Tissue
GTEX-ZYW4-0826-SM-5GIDG     Breast - Mammary Tissue
GTEX-ZYWO-2326-SM-5GZY4     Breast - Mammary Tissue
GTEX-ZZ64-1226-SM-5E43R     Breast - Mammary Tissue
GTEX-ZZPU-0626-SM-5E43T     Breast - Mammary Tissue
Name: SMTSD, Length: 306, dtype: object
Count of samples  306
SAMPID
GTEX-1117F-2826-SM-5GZXL    Breast - Mammary Tissue
GTEX-111YS-1926-SM-5GICC    Breast - Mammary Tissue
GTEX-1122O-1226-SM-5H113    Breast - Mammary Tissue
GTEX-117XS-1926-SM-5GICO    Breast - Mammary Tissue
GTEX-117YX-1426-SM-5H12H    Breast - Mammary Tissue
                                     ...           
GTEX-ZYT6-0126-SM-

In [17]:
getTissueMappings('COLON', 'Colon')
print(np.unique(getTissueMappings('COLON', 'Colon')))

Count of samples  539
SAMPID
GTEX-111CU-1226-SM-5EGIN       Colon - Sigmoid
GTEX-111CU-1426-SM-5GZYP    Colon - Transverse
GTEX-111VG-2226-SM-5N9DU    Colon - Transverse
GTEX-111YS-1626-SM-5GZZ9    Colon - Transverse
GTEX-111YS-1826-SM-5GIEP       Colon - Sigmoid
                                   ...        
GTEX-ZYVF-2826-SM-5GIDK     Colon - Transverse
GTEX-ZYWO-1626-SM-5E44H     Colon - Transverse
GTEX-ZYY3-2226-SM-5E45A        Colon - Sigmoid
GTEX-ZZ64-0726-SM-5GZX4     Colon - Transverse
GTEX-ZZ64-0826-SM-5E449        Colon - Sigmoid
Name: SMTSD, Length: 539, dtype: object
Count of samples  539
SAMPID
GTEX-111CU-1226-SM-5EGIN       Colon - Sigmoid
GTEX-111CU-1426-SM-5GZYP    Colon - Transverse
GTEX-111VG-2226-SM-5N9DU    Colon - Transverse
GTEX-111YS-1626-SM-5GZZ9    Colon - Transverse
GTEX-111YS-1826-SM-5GIEP       Colon - Sigmoid
                                   ...        
GTEX-ZYVF-2826-SM-5GIDK     Colon - Transverse
GTEX-ZYWO-1626-SM-5E44H     Colon - Transverse
GTEX-ZYY3

In [19]:
getTissueMappings('LUNG', 'Lung')
print(np.unique(getTissueMappings('LUNG', 'Lung')))

Count of samples  607
SAMPID
GTEX-111CU-0326-SM-5GZXO    Lung
GTEX-111FC-1126-SM-5GZWU    Lung
GTEX-111VG-0726-SM-5GIDC    Lung
GTEX-111YS-0626-SM-5GZXV    Lung
GTEX-1122O-0126-SM-5GICA    Lung
                            ... 
GTEX-ZYW4-1526-SM-5SIBA     Lung
GTEX-ZYWO-0826-SM-5E45W     Lung
GTEX-ZYY3-0926-SM-5E454     Lung
GTEX-ZZPT-1326-SM-5E43H     Lung
GTEX-ZZPU-0526-SM-5E44U     Lung
Name: SMTSD, Length: 607, dtype: object
Count of samples  607
SAMPID
GTEX-111CU-0326-SM-5GZXO    Lung
GTEX-111FC-1126-SM-5GZWU    Lung
GTEX-111VG-0726-SM-5GIDC    Lung
GTEX-111YS-0626-SM-5GZXV    Lung
GTEX-1122O-0126-SM-5GICA    Lung
                            ... 
GTEX-ZYW4-1526-SM-5SIBA     Lung
GTEX-ZYWO-0826-SM-5E45W     Lung
GTEX-ZYY3-0926-SM-5E454     Lung
GTEX-ZZPT-1326-SM-5E43H     Lung
GTEX-ZZPU-0526-SM-5E44U     Lung
Name: SMTSD, Length: 607, dtype: object
['Lung']


In [21]:
getTissueMappings('AML', 'Whole Blood')
print(np.unique(getTissueMappings('AML', 'Whole Blood')))

Count of samples  2412
SAMPID
GTEX-1117F-0003-SM-58Q7G    Whole Blood
GTEX-1117F-0003-SM-5DWSB    Whole Blood
GTEX-1117F-0003-SM-6WBT7    Whole Blood
GTEX-111CU-0003-SM-58Q95    Whole Blood
GTEX-111CU-0003-SM-5DWTR    Whole Blood
                               ...     
GTEX-ZZPT-0002-SM-5DWTU     Whole Blood
GTEX-ZZPT-0002-SM-6WBUG     Whole Blood
GTEX-ZZPU-0003-SM-58Q8V     Whole Blood
GTEX-ZZPU-0003-SM-5DWTO     Whole Blood
GTEX-ZZPU-0003-SM-6WBUC     Whole Blood
Name: SMTSD, Length: 2412, dtype: object
Count of samples  2412
SAMPID
GTEX-1117F-0003-SM-58Q7G    Whole Blood
GTEX-1117F-0003-SM-5DWSB    Whole Blood
GTEX-1117F-0003-SM-6WBT7    Whole Blood
GTEX-111CU-0003-SM-58Q95    Whole Blood
GTEX-111CU-0003-SM-5DWTR    Whole Blood
                               ...     
GTEX-ZZPT-0002-SM-5DWTU     Whole Blood
GTEX-ZZPT-0002-SM-6WBUG     Whole Blood
GTEX-ZZPU-0003-SM-58Q8V     Whole Blood
GTEX-ZZPU-0003-SM-5DWTO     Whole Blood
GTEX-ZZPU-0003-SM-6WBUC     Whole Blood
Name: SMTSD, Length

In [22]:
getTissueMappings('BRAIN', 'Brain')
print(np.unique(getTissueMappings('BRAIN', 'Brain')))

Count of samples  2076
SAMPID
GTEX-1117F-3226-SM-5N9CT        Brain - Cortex
GTEX-111FC-3126-SM-5GZZ2        Brain - Cortex
GTEX-111FC-3326-SM-5GZYV    Brain - Cerebellum
GTEX-1128S-2726-SM-5H12C        Brain - Cortex
GTEX-1128S-2826-SM-5N9DI    Brain - Cerebellum
                                   ...        
GTEX-ZYFD-3026-SM-5E44C         Brain - Cortex
GTEX-ZYY3-3026-SM-5GIEJ     Brain - Cerebellum
GTEX-ZYY3-3126-SM-5SI9L         Brain - Cortex
GTEX-ZZPT-2926-SM-5EQ5S     Brain - Cerebellum
GTEX-ZZPT-3026-SM-5GZXH         Brain - Cortex
Name: SMTSD, Length: 2076, dtype: object
Count of samples  2076
SAMPID
GTEX-1117F-3226-SM-5N9CT        Brain - Cortex
GTEX-111FC-3126-SM-5GZZ2        Brain - Cortex
GTEX-111FC-3326-SM-5GZYV    Brain - Cerebellum
GTEX-1128S-2726-SM-5H12C        Brain - Cortex
GTEX-1128S-2826-SM-5N9DI    Brain - Cerebellum
                                   ...        
GTEX-ZYFD-3026-SM-5E44C         Brain - Cortex
GTEX-ZYY3-3026-SM-5GIEJ     Brain - Cerebellum
GTEX-Z

In [23]:
getTissueMappings('SKIN', 'Skin')
print(np.unique(getTissueMappings('SKIN', 'Skin')))

Count of samples  1000
SAMPID
GTEX-1117F-2926-SM-5GZYI    Skin - Not Sun Exposed (Suprapubic)
GTEX-111CU-1126-SM-5EGIM    Skin - Not Sun Exposed (Suprapubic)
GTEX-111CU-1926-SM-5GZYZ         Skin - Sun Exposed (Lower leg)
GTEX-111FC-0126-SM-5N9DL         Skin - Sun Exposed (Lower leg)
GTEX-111FC-2526-SM-5GZXU    Skin - Not Sun Exposed (Suprapubic)
                                           ...                 
GTEX-ZYY3-2926-SM-5GIEB     Skin - Not Sun Exposed (Suprapubic)
GTEX-ZZ64-1026-SM-5GZXG     Skin - Not Sun Exposed (Suprapubic)
GTEX-ZZ64-1726-SM-5GZYB          Skin - Sun Exposed (Lower leg)
GTEX-ZZPT-0226-SM-5E43X          Skin - Sun Exposed (Lower leg)
GTEX-ZZPU-0826-SM-5GZX5     Skin - Not Sun Exposed (Suprapubic)
Name: SMTSD, Length: 1000, dtype: object
Count of samples  1000
SAMPID
GTEX-1117F-2926-SM-5GZYI    Skin - Not Sun Exposed (Suprapubic)
GTEX-111CU-1126-SM-5EGIM    Skin - Not Sun Exposed (Suprapubic)
GTEX-111CU-1926-SM-5GZYZ         Skin - Sun Exposed (Lower leg)
GTE

In [24]:
getTissueMappings('SARCOMA', 'Muscle')
print(np.unique(getTissueMappings('SARCOMA', 'Muscle')))

Count of samples  718
SAMPID
GTEX-1117F-0426-SM-5EGHI    Muscle - Skeletal
GTEX-111CU-2026-SM-5GZZC    Muscle - Skeletal
GTEX-111FC-0326-SM-5GZZ1    Muscle - Skeletal
GTEX-111VG-2626-SM-5GZY2    Muscle - Skeletal
GTEX-111YS-2326-SM-5987L    Muscle - Skeletal
                                  ...        
GTEX-ZYWO-2626-SM-5E43V     Muscle - Skeletal
GTEX-ZYY3-0526-SM-5E45G     Muscle - Skeletal
GTEX-ZZ64-1526-SM-5E43K     Muscle - Skeletal
GTEX-ZZPT-0626-SM-5GZXT     Muscle - Skeletal
GTEX-ZZPU-2626-SM-5E45Y     Muscle - Skeletal
Name: SMTSD, Length: 718, dtype: object
Count of samples  718
SAMPID
GTEX-1117F-0426-SM-5EGHI    Muscle - Skeletal
GTEX-111CU-2026-SM-5GZZC    Muscle - Skeletal
GTEX-111FC-0326-SM-5GZZ1    Muscle - Skeletal
GTEX-111VG-2626-SM-5GZY2    Muscle - Skeletal
GTEX-111YS-2326-SM-5987L    Muscle - Skeletal
                                  ...        
GTEX-ZYWO-2626-SM-5E43V     Muscle - Skeletal
GTEX-ZYY3-0526-SM-5E45G     Muscle - Skeletal
GTEX-ZZ64-1526-SM-5E43K     

In [25]:
getTissueMappings('LIVER', 'Liver')
print(np.unique(getTissueMappings('LIVER', 'Liver')))

Count of samples  188
SAMPID
GTEX-1192X-1026-SM-5H12P    Liver
GTEX-11DXY-0526-SM-5EGGQ    Liver
GTEX-11DXZ-0126-SM-5EGGY    Liver
GTEX-11EQ9-0526-SM-5A5JZ    Liver
GTEX-11GSP-0626-SM-5986T    Liver
                            ...  
GTEX-ZVT3-1626-SM-5GU66     Liver
GTEX-ZVT4-0626-SM-5E45T     Liver
GTEX-ZYT6-0626-SM-5E45V     Liver
GTEX-ZYY3-0626-SM-5NQ6W     Liver
GTEX-ZZPU-0426-SM-5GZYH     Liver
Name: SMTSD, Length: 188, dtype: object
Count of samples  188
SAMPID
GTEX-1192X-1026-SM-5H12P    Liver
GTEX-11DXY-0526-SM-5EGGQ    Liver
GTEX-11DXZ-0126-SM-5EGGY    Liver
GTEX-11EQ9-0526-SM-5A5JZ    Liver
GTEX-11GSP-0626-SM-5986T    Liver
                            ...  
GTEX-ZVT3-1626-SM-5GU66     Liver
GTEX-ZVT4-0626-SM-5E45T     Liver
GTEX-ZYT6-0626-SM-5E45V     Liver
GTEX-ZYY3-0626-SM-5NQ6W     Liver
GTEX-ZZPU-0426-SM-5GZYH     Liver
Name: SMTSD, Length: 188, dtype: object
['Liver']


In [26]:
getTissueMappings('KIDNEY', 'Kidney')
print(np.unique(getTissueMappings('KIDNEY', 'Kidney')))

Count of samples  50
SAMPID
GTEX-11GS4-2326-SM-5A5KS    Kidney - Cortex
GTEX-11OF3-1326-SM-5N9FJ    Kidney - Cortex
GTEX-11PRG-2226-SM-5GU5R    Kidney - Cortex
GTEX-11TTK-1926-SM-5PNW8    Kidney - Cortex
GTEX-12696-0926-SM-5FQTV    Kidney - Cortex
GTEX-12WSG-0826-SM-5EQ5A    Kidney - Cortex
GTEX-13112-2126-SM-5GCO4    Kidney - Cortex
GTEX-1399S-0526-SM-5IJG8    Kidney - Cortex
GTEX-13NYB-1726-SM-5N9G2    Kidney - Cortex
GTEX-13O1R-2526-SM-5N9FW    Kidney - Cortex
GTEX-13OVI-1126-SM-5KLZF    Kidney - Cortex
GTEX-13OVL-1826-SM-5KLZR    Kidney - Cortex
GTEX-13OW6-1826-SM-5N9F9    Kidney - Cortex
GTEX-13RTJ-2226-SM-5S2Q1    Kidney - Cortex
GTEX-145MN-0326-SM-5QGQI    Kidney - Cortex
GTEX-147F4-2626-SM-5Q5CS    Kidney - Cortex
GTEX-1497J-0826-SM-5NQAJ    Kidney - Cortex
GTEX-14C39-2126-SM-664OH    Kidney - Cortex
GTEX-14C5O-2026-SM-5YYB1    Kidney - Cortex
GTEX-14E6D-2526-SM-5YYA9    Kidney - Cortex
GTEX-15CHQ-2126-SM-6871M    Kidney - Cortex
GTEX-15DYW-2026-SM-6AJBD    Kidney - Cortex
GTEX

In [27]:
getTissueMappings('OV', 'Ovary')
print(np.unique(getTissueMappings('OV', 'Ovary')))

Count of samples  138
SAMPID
GTEX-1117F-2226-SM-5N9CH    Ovary
GTEX-11DXX-1426-SM-5GIDU    Ovary
GTEX-11EM3-1726-SM-5N9D1    Ovary
GTEX-11EMC-1726-SM-5H11P    Ovary
GTEX-11GSP-0226-SM-5A5KV    Ovary
                            ...  
GTEX-ZVT3-2626-SM-5GU5L     Ovary
GTEX-ZYFG-1726-SM-5GZZB     Ovary
GTEX-ZYWO-1926-SM-5E456     Ovary
GTEX-ZYY3-2726-SM-5EGH4     Ovary
GTEX-ZZPU-2126-SM-5EGIU     Ovary
Name: SMTSD, Length: 138, dtype: object
Count of samples  138
SAMPID
GTEX-1117F-2226-SM-5N9CH    Ovary
GTEX-11DXX-1426-SM-5GIDU    Ovary
GTEX-11EM3-1726-SM-5N9D1    Ovary
GTEX-11EMC-1726-SM-5H11P    Ovary
GTEX-11GSP-0226-SM-5A5KV    Ovary
                            ...  
GTEX-ZVT3-2626-SM-5GU5L     Ovary
GTEX-ZYFG-1726-SM-5GZZB     Ovary
GTEX-ZYWO-1926-SM-5E456     Ovary
GTEX-ZYY3-2726-SM-5EGH4     Ovary
GTEX-ZZPU-2126-SM-5EGIU     Ovary
Name: SMTSD, Length: 138, dtype: object
['Ovary']


In [28]:
getTissueMappings('PROSTATE', 'Prostate')
print(np.unique(getTissueMappings('PROSTATE', 'Prostate')))

Count of samples  159
SAMPID
GTEX-111CU-1526-SM-5N9FS    Prostate
GTEX-111FC-2026-SM-5GZYO    Prostate
GTEX-111YS-1726-SM-5GIED    Prostate
GTEX-117YW-1426-SM-5EGGO    Prostate
GTEX-117YX-1526-SM-5H12T    Prostate
                              ...   
GTEX-ZUA1-2826-SM-59HLE     Prostate
GTEX-ZVP2-0826-SM-59HJ6     Prostate
GTEX-ZY6K-1526-SM-5GZXE     Prostate
GTEX-ZYFD-2526-SM-5E45L     Prostate
GTEX-ZZ64-0926-SM-5E44L     Prostate
Name: SMTSD, Length: 159, dtype: object
Count of samples  159
SAMPID
GTEX-111CU-1526-SM-5N9FS    Prostate
GTEX-111FC-2026-SM-5GZYO    Prostate
GTEX-111YS-1726-SM-5GIED    Prostate
GTEX-117YW-1426-SM-5EGGO    Prostate
GTEX-117YX-1526-SM-5H12T    Prostate
                              ...   
GTEX-ZUA1-2826-SM-59HLE     Prostate
GTEX-ZVP2-0826-SM-59HJ6     Prostate
GTEX-ZY6K-1526-SM-5GZXE     Prostate
GTEX-ZYFD-2526-SM-5E45L     Prostate
GTEX-ZZ64-0926-SM-5E44L     Prostate
Name: SMTSD, Length: 159, dtype: object
['Prostate']


In [29]:
getTissueMappings('CERVICAL', 'Cervix')
print(np.unique(getTissueMappings('CERVICAL', 'Cervix')))

Count of samples  11
SAMPID
GTEX-S32W-1526-SM-4AD6Z    Cervix - Ectocervix
GTEX-S32W-1626-SM-4AD6G    Cervix - Endocervix
GTEX-S341-1126-SM-4AD6T    Cervix - Ectocervix
GTEX-S341-1326-SM-4AD72    Cervix - Endocervix
GTEX-S4UY-1426-SM-4AD6Y    Cervix - Ectocervix
GTEX-T5JW-0726-SM-4DM6D    Cervix - Ectocervix
GTEX-T6MO-1426-SM-4DM73    Cervix - Endocervix
GTEX-TML8-0726-SM-4DXTT    Cervix - Endocervix
GTEX-TSE9-2726-SM-4DXSQ    Cervix - Endocervix
GTEX-TSE9-2826-SM-4DXTF    Cervix - Ectocervix
GTEX-U3ZN-1626-SM-4DXTZ    Cervix - Ectocervix
Name: SMTSD, dtype: object
Count of samples  11
SAMPID
GTEX-S32W-1526-SM-4AD6Z    Cervix - Ectocervix
GTEX-S32W-1626-SM-4AD6G    Cervix - Endocervix
GTEX-S341-1126-SM-4AD6T    Cervix - Ectocervix
GTEX-S341-1326-SM-4AD72    Cervix - Endocervix
GTEX-S4UY-1426-SM-4AD6Y    Cervix - Ectocervix
GTEX-T5JW-0726-SM-4DM6D    Cervix - Ectocervix
GTEX-T6MO-1426-SM-4DM73    Cervix - Endocervix
GTEX-TML8-0726-SM-4DXTT    Cervix - Endocervix
GTEX-TSE9-2726-SM-4DXSQ 

In [30]:
getTissueMappings('BLADDER', 'Bladder')
print(np.unique(getTissueMappings('BLADDER', 'Bladder')))

Count of samples  11
SAMPID
GTEX-S32W-1126-SM-4AD5V    Bladder
GTEX-S3XE-1226-SM-4AD4L    Bladder
GTEX-S4Q7-0926-SM-4AD5D    Bladder
GTEX-S4UY-0926-SM-4AD6O    Bladder
GTEX-SE5C-1026-SM-4BRUG    Bladder
GTEX-SNMC-0826-SM-4DM66    Bladder
GTEX-SNOS-0526-SM-4DM54    Bladder
GTEX-TMMY-1526-SM-4DXST    Bladder
GTEX-U3ZM-0826-SM-4DXU6    Bladder
GTEX-U3ZN-1226-SM-4DXUD    Bladder
GTEX-U4B1-1226-SM-4DXT7    Bladder
Name: SMTSD, dtype: object
Count of samples  11
SAMPID
GTEX-S32W-1126-SM-4AD5V    Bladder
GTEX-S3XE-1226-SM-4AD4L    Bladder
GTEX-S4Q7-0926-SM-4AD5D    Bladder
GTEX-S4UY-0926-SM-4AD6O    Bladder
GTEX-SE5C-1026-SM-4BRUG    Bladder
GTEX-SNMC-0826-SM-4DM66    Bladder
GTEX-SNOS-0526-SM-4DM54    Bladder
GTEX-TMMY-1526-SM-4DXST    Bladder
GTEX-U3ZM-0826-SM-4DXU6    Bladder
GTEX-U3ZN-1226-SM-4DXUD    Bladder
GTEX-U4B1-1226-SM-4DXT7    Bladder
Name: SMTSD, dtype: object
['Bladder']


In [31]:
getTissueMappings('STOMACH', 'Stomach')
print(np.unique(getTissueMappings('STOMACH', 'Stomach')))

Count of samples  272
SAMPID
GTEX-111CU-0926-SM-5EGIK    Stomach
GTEX-111YS-1126-SM-5GZYQ    Stomach
GTEX-1122O-1926-SM-5EGIQ    Stomach
GTEX-117YW-2226-SM-5N9DB    Stomach
GTEX-117YX-1026-SM-5H11V    Stomach
                             ...   
GTEX-ZYFG-1326-SM-5GICJ     Stomach
GTEX-ZYVF-2726-SM-5GID4     Stomach
GTEX-ZYY3-1726-SM-5EGH3     Stomach
GTEX-ZZ64-0426-SM-5E43F     Stomach
GTEX-ZZPU-1426-SM-5GZZ6     Stomach
Name: SMTSD, Length: 272, dtype: object
Count of samples  272
SAMPID
GTEX-111CU-0926-SM-5EGIK    Stomach
GTEX-111YS-1126-SM-5GZYQ    Stomach
GTEX-1122O-1926-SM-5EGIQ    Stomach
GTEX-117YW-2226-SM-5N9DB    Stomach
GTEX-117YX-1026-SM-5H11V    Stomach
                             ...   
GTEX-ZYFG-1326-SM-5GICJ     Stomach
GTEX-ZYVF-2726-SM-5GID4     Stomach
GTEX-ZYY3-1726-SM-5EGH3     Stomach
GTEX-ZZ64-0426-SM-5E43F     Stomach
GTEX-ZZPU-1426-SM-5GZZ6     Stomach
Name: SMTSD, Length: 272, dtype: object
['Stomach']


In [32]:
getTissueMappings('PANCREAS', 'Pancreas')
print(np.unique(getTissueMappings('PANCREAS', 'Pancreas')))


Count of samples  268
SAMPID
GTEX-111CU-0526-SM-5EGHK    Pancreas
GTEX-111YS-1226-SM-5EGGJ    Pancreas
GTEX-1122O-0726-SM-5GIEV    Pancreas
GTEX-1128S-0826-SM-5GZZI    Pancreas
GTEX-117YX-0226-SM-5EGH6    Pancreas
                              ...   
GTEX-ZYT6-1326-SM-5E453     Pancreas
GTEX-ZYW4-2126-SM-59HJ9     Pancreas
GTEX-ZYWO-1326-SM-5SI8X     Pancreas
GTEX-ZYY3-0826-SM-5E44R     Pancreas
GTEX-ZZPU-0726-SM-5N9C8     Pancreas
Name: SMTSD, Length: 268, dtype: object
Count of samples  268
SAMPID
GTEX-111CU-0526-SM-5EGHK    Pancreas
GTEX-111YS-1226-SM-5EGGJ    Pancreas
GTEX-1122O-0726-SM-5GIEV    Pancreas
GTEX-1128S-0826-SM-5GZZI    Pancreas
GTEX-117YX-0226-SM-5EGH6    Pancreas
                              ...   
GTEX-ZYT6-1326-SM-5E453     Pancreas
GTEX-ZYW4-2126-SM-59HJ9     Pancreas
GTEX-ZYWO-1326-SM-5SI8X     Pancreas
GTEX-ZYY3-0826-SM-5E44R     Pancreas
GTEX-ZZPU-0726-SM-5N9C8     Pancreas
Name: SMTSD, Length: 268, dtype: object
['Pancreas']


In [38]:
getTissueMappings('UTERINE', 'Uterus')

Count of samples  117
SAMPID
GTEX-1117F-2426-SM-5EGGH    Uterus
GTEX-113JC-2226-SM-5EGJG    Uterus
GTEX-11DXX-1526-SM-5H115    Uterus
GTEX-11EM3-1926-SM-5987U    Uterus
GTEX-11EMC-1826-SM-5A5JT    Uterus
GTEX-11GSP-2426-SM-5N9BD    Uterus
GTEX-11I78-2126-SM-5A5K8    Uterus
GTEX-11P81-1626-SM-5BC52    Uterus
GTEX-11ZTS-2326-SM-5EQMY    Uterus
GTEX-11ZTT-1726-SM-5EQL4    Uterus
GTEX-12WSD-2826-SM-59HKT    Uterus
GTEX-12WSG-2126-SM-5EGJ7    Uterus
GTEX-12WSK-2026-SM-5CVNB    Uterus
GTEX-12ZZX-2126-SM-5LZVL    Uterus
GTEX-1313W-2826-SM-5P9G1    Uterus
GTEX-131XG-2026-SM-5GCN5    Uterus
GTEX-131YS-2326-SM-5IJFJ    Uterus
GTEX-132AR-1526-SM-5KM1L    Uterus
GTEX-1399S-2226-SM-5IFEW    Uterus
GTEX-1399U-1326-SM-5IJET    Uterus
GTEX-139D8-2526-SM-5N9G3    Uterus
GTEX-13D11-1226-SM-5IFGA    Uterus
GTEX-13FTX-1026-SM-5J2O5    Uterus
GTEX-13N11-1126-SM-5KM41    Uterus
GTEX-13OVI-1026-SM-5L3EM    Uterus
GTEX-13OVJ-2326-SM-5IJGA    Uterus
GTEX-13PL7-2026-SM-5IFGK    Uterus
GTEX-13QBU-1626-SM-5LU4S  

In [39]:
getTissueMappings('THYROID', 'Thyroid')

Count of samples  564
SAMPID
GTEX-111CU-0226-SM-5GZXC    Thyroid
GTEX-111FC-1026-SM-5GZX1    Thyroid
GTEX-111VG-0526-SM-5N9BW    Thyroid
GTEX-111YS-0726-SM-5GZY8    Thyroid
GTEX-1122O-0226-SM-5N9DA    Thyroid
GTEX-1128S-0126-SM-5H12S    Thyroid
GTEX-113JC-0126-SM-5EGJW    Thyroid
GTEX-117XS-0526-SM-5987Q    Thyroid
GTEX-117YW-0126-SM-5EGGN    Thyroid
GTEX-117YX-1226-SM-5H11S    Thyroid
GTEX-1192W-0126-SM-5EGGS    Thyroid
GTEX-1192X-1126-SM-5EGGU    Thyroid
GTEX-11DXX-0226-SM-5P9HL    Thyroid
GTEX-11DXY-0426-SM-5H12R    Thyroid
GTEX-11DXZ-0926-SM-5N9CG    Thyroid
GTEX-11DYG-0826-SM-5N9GH    Thyroid
GTEX-11DZ1-2726-SM-5A5KH    Thyroid
GTEX-11EI6-0726-SM-59866    Thyroid
GTEX-11EM3-0126-SM-5985K    Thyroid
GTEX-11EMC-0226-SM-5EGLP    Thyroid
GTEX-11EQ8-0826-SM-5N9FG    Thyroid
GTEX-11EQ9-0626-SM-5A5K1    Thyroid
GTEX-11GS4-0826-SM-5986J    Thyroid
GTEX-11GSO-0626-SM-5A5LW    Thyroid
GTEX-11GSP-0126-SM-5A5KU    Thyroid
GTEX-11I78-0526-SM-5986A    Thyroid
GTEX-11LCK-0526-SM-5A5M9    Thyroid

In [2]:

#Define all cancer types
cancer_types = ['BRCA', 'AML', 
                'COLON', 'LUNG', 
                'BRAIN', 'OV', 
                'SARCOMA', 'KIDNEY', 
                'LIVER', 'STOMACH', 
                'SKIN', 'PROSTATE',
                'THYROID', 'UTERINE', 
                'HEAD_NECK', 'PANCREAS',
                'CERVICAL', 'BLADDER']

In [7]:
for cancer in np.sort(cancer_types):
    print(cancer)
    if cancer != 'HEAD_NECK':
        data_df = pd.read_csv('../ALL_CANCER_FILES/' + cancer + '/HEALTHY_TISSUE_FILES/' + cancer + '_DeepProfile_GTEX_Healthy_Tissue_Embedding_150L.tsv')
        print(data_df.shape)
    

AML
(407, 1)
BLADDER
(10, 1)
BRAIN
(1670, 1)
BRCA
(289, 1)
CERVICAL
(10, 1)
COLON
(506, 1)
HEAD_NECK
KIDNEY
(44, 1)
LIVER
(175, 1)
LUNG
(426, 1)
OV
(132, 1)
PANCREAS
(247, 1)
PROSTATE
(151, 1)
SARCOMA
(563, 1)
SKIN
(859, 1)
STOMACH
(261, 1)
THYROID
(445, 1)
UTERINE
(110, 1)
