# Brain Tumor Dataset retrieval

In [22]:
# BiocManager::install("ExperimentHub", "GEOquery")
install.packages(c("GenomicDataCommons", "dplyr", "readr"))

options(scipen=10)
options(repr.plot.width=12, repr.plot.height=12)

"packages 'GenomicDataCommons', 'dplyr', 'readr' are in use and will not be installed"


In [30]:
library(GEOquery)

gse <- getGEO("GSE50161", GSEMatrix = TRUE)[[1]]
brain_data <- exprs(gse)  # Gene expression matrix (genes x samples)

# Check available column names
#colnames(pData(gse))

# Tissues where the tumor was extracted
brain_labels <- pData(gse)$"tissue:ch1"
dim(brain_data)
table(brain_labels)

# Tissues of interest (most of the samples are located here)
selected_labels <- c("ependymoma", "glioblastoma", "medulloblastoma", "pilocytic astrocytoma")

# Filter the samples based on selected labels
selected_indices <- brain_labels %in% selected_labels
brain_data <- brain_data[, selected_indices]
brain_labels <- brain_labels[selected_indices]

# Normalize the data using log2 transformation (adding 1 to avoid log(0))
brain_data <- log2(brain_data + 1)

# Compute variance for each gene and select top 200 genes
gene_variances <- apply(brain_data, 1, var)  # Compute variance per gene
top_genes <- names(sort(gene_variances, decreasing = TRUE))[1:200]  # Select top 200 genes
brain_data <- brain_data[top_genes, ]  # Keep only top 200 genes

# Transpose the data so individuals (samples) are rows and genes are columns
brain_data <- t(brain_data)

# Convert to data frame and add "Description" column
brain_data <- data.frame(Description = brain_labels, brain_data)

# Check the final structure
dim(brain_data)  # Should be (number of selected samples) x (201, including Description)
head(brain_data)  # View first few rows

Found 1 file(s)

GSE50161_series_matrix.txt.gz

Using locally cached version: C:\Users\Neo\AppData\Local\Temp\Rtmpu6jrEV/GSE50161_series_matrix.txt.gz

Using locally cached version of GPL570 found here:
C:\Users\Neo\AppData\Local\Temp\Rtmpu6jrEV/GPL570.soft.gz 



brain_labels
                       cerebellum                        ependymoma 
                                2                                46 
  frontal lobe of cerebral cortex                      glioblastoma 
                                3                                34 
                          medulla                   medulloblastoma 
                                1                                22 
                         midbrain occipital lobe of cerebral cortex 
                                1                                 2 
 parietal lobe of cerebral cortex             pilocytic astrocytoma 
                                2                                15 
 temporal lobe of cerebral cortex                          thalamus 
                                1                                 1 

Unnamed: 0_level_0,Description,X224588_at,X201909_at,X229012_at,X210033_s_at,X236448_at,X240065_at,X206018_at,X225165_at,X220156_at,⋯,X206306_at,X203649_s_at,X206785_s_at,X1563933_a_at,X238603_at,X205751_at,X210271_at,X228347_at,X232720_at,X209138_x_at
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
GSM1214834,ependymoma,1.67813,3.644194,3.820705,3.735552,1.172587,3.624207,1.794471,3.691314,3.589067,⋯,3.59653,1.873599,2.146811,3.110174,1.524128,2.563318,1.450476,1.880268,1.737017,1.776431
GSM1214835,ependymoma,3.767506,1.6675,3.224272,3.56606,3.134809,3.381327,3.608933,3.828011,3.315655,⋯,1.716341,1.825732,2.149566,1.619617,1.601342,2.726899,2.637962,1.991922,1.788192,2.720051
GSM1214836,ependymoma,1.676252,3.747256,3.456146,3.642242,1.194883,3.520656,1.797164,3.463318,3.227379,⋯,3.431167,2.897142,2.983751,1.608201,1.587031,2.585998,1.449467,1.815894,1.748981,1.783816
GSM1214837,ependymoma,3.792194,1.636888,3.823623,3.786311,1.168471,3.498731,3.81855,2.747462,3.578139,⋯,1.943356,1.839781,2.128098,1.537581,3.151399,3.113747,1.424955,1.906251,3.647694,2.645952
GSM1214838,ependymoma,3.809949,1.639904,3.561411,1.590063,2.933787,2.997341,1.648297,3.624553,3.551927,⋯,3.594693,2.148758,2.130709,3.167416,1.614435,1.762541,1.4295,1.849851,1.719865,1.758591
GSM1214839,ependymoma,1.740624,3.700109,3.89331,3.761085,1.288353,3.74552,1.841266,3.714091,3.746951,⋯,3.263901,1.930619,2.309651,1.871377,1.540925,1.773522,1.461576,1.901027,1.799332,1.794916


In [27]:
table(brain_labels)

brain_labels
           ependymoma          glioblastoma       medulloblastoma 
                   46                    34                    22 
pilocytic astrocytoma 
                   15 

In [28]:
BrainTumor200 = brain_data
#save(BrainTumor200, file = "BrainTumor200.RData")