# Methylation SVM

In [1]:
library(e1071)
library(dplyr)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [2]:
gdsc = readRDS('data/gdsc_screened.rds')
meth = readRDS('data/methylation_processed.rds')

In [3]:
meth_nona = t(na.omit(t(meth)))

In [4]:
get.t <- function(dat, labs){
    # split the data into effective and ineffective
    effect <- dat[labs]
    ineffect <- dat[!labs]
    
    # calculate the two sample means
    effect.bar <- mean(effect)
    ineffect.bar <- mean(ineffect)
    
    # calculate the two sample variances
    v.effect <- var(effect)
    v.ineffect <- var(ineffect)
    
    # calculate the sample sizes
    n.effect <- length(effect)
    n.ineffect <- length(ineffect)
    
    # calculate the sd
    s <- sqrt((v.effect/n.effect) + (v.ineffect/n.ineffect))
    
    # calculate the test statistic
    T <- (effect.bar - ineffect.bar)/s
    
    # calculate the degrees of freedom
    # df = ((v.effect/n.effect+v.ineffect/n.ineffect)^2)/(v.effect^2/(n.effect^2 * (n.effect-1))+v.ineffect^2/(n.ineffect^2 * (n.ineffect-1)))
    
    # compare our t value and the threshold, decide whether we should keep it or not
    return(abs(T))
}

In [5]:
codes <- c("1006","1007","1008","1011","1014","1016","1019","1026","1032","1054","1060","1062")
n_features = c(2,3,5,10,50,100,200)
resultmat = matrix(nrow=12,ncol=7)
rownames(resultmat) = c("1006","1007","1008","1011","1014","1016","1019","1026","1032","1054","1060","1062")
colnames(resultmat) = c("2","3","5","10","50","100","200")

In [20]:
for (code in codes){
    subset_file = paste(c("meth","sub/meth",code,"subset","id.rds"),collapse='_')
    print(subset_file)
    subset_id = readRDS(subset_file)
    meth = meth_nona[,subset_id]
    gdsc_sub <- subset(gdsc, DRUG_ID_lib == code)[,c("CL","EFFECT")]
    meth_sub <- meth[as.character(gdsc_sub$CL),subset_id]
    outcome <- data.frame(CG = colnames(meth))
    outcome$T <- apply(meth_sub,2,get.t,gdsc_sub$EFFECT)
    for (n_feature in n_features){
        subset_id = outcome %>% top_n(n_feature,T) %>% select(CG)
        subset_id = unname(t(subset_id[,1]))
        meth_sub_final = cbind(meth_sub[,subset_id],gdsc_sub$EFFECT)
        meth_sub_final = as.data.frame(meth_sub_final)
        colnames(meth_sub_final)[ncol(meth_sub_final)]='EFFECT' 
        meth_sub_final$EFFECT <- as.factor(meth_sub_final$EFFECT)
        effective_sub = meth_sub_final %>% filter(EFFECT==1)
        ineffective_sub = meth_sub_final %>%  filter(EFFECT==0)
        effective_sub$EFFECT = as.factor(effective_sub$EFFECT)
        ineffective_sub$EFFECT = as.factor(ineffective_sub$EFFECT)
        tune.out = tune ( svm , EFFECT~., data = meth_sub_final , 
                 ranges = list ( cost = c (0.001 , 0.01 , 0.1 , 1 ,10 ,100,1000,10000),gamma=c(0.01,0.1,1,10,100,1000) ) )
        accuracy_test_svm = c()
        for (i in 1:8){
            set.seed(i)
            train_effective_id = sample(1:nrow(effective_sub),0.7*nrow(effective_sub))
            train_ineffective_id = sample(1:nrow(ineffective_sub),0.7*nrow(ineffective_sub))
            train_sub = rbind(effective_sub[train_effective_id,],ineffective_sub[train_ineffective_id,])
            test_sub = rbind(effective_sub[-train_effective_id,],ineffective_sub[-train_ineffective_id,])
            mysvm = svm(EFFECT~.,data=train_sub,kernel="radial",cost=tune.out$best.model$cost,gamma=tune.out$best.model$gamma,scale=FALSE)
            # training_pred = predict(mysvm,train_sub)
            # accuracy_train_svm[i] = mean(training_pred==train_sub[['EFFECT']])
            test_pred = predict(mysvm,test_sub)
            accuracy_test_svm[i] = mean(test_pred==test_sub[['EFFECT']])
        }
        resultmat[code,toString(n_feature)] = mean(accuracy_test_svm)
    }
}

[1] "meth_sub/meth_1006_subset_id.rds"
[1] "meth_sub/meth_1007_subset_id.rds"
[1] "meth_sub/meth_1008_subset_id.rds"
[1] "meth_sub/meth_1011_subset_id.rds"
[1] "meth_sub/meth_1014_subset_id.rds"
[1] "meth_sub/meth_1016_subset_id.rds"
[1] "meth_sub/meth_1019_subset_id.rds"
[1] "meth_sub/meth_1026_subset_id.rds"
[1] "meth_sub/meth_1032_subset_id.rds"
[1] "meth_sub/meth_1054_subset_id.rds"
[1] "meth_sub/meth_1060_subset_id.rds"
[1] "meth_sub/meth_1062_subset_id.rds"


In [22]:
saveRDS(resultmat,"data/meth_SVM_result.rds")