# Baseline method to determine SNP-SNP interaction

### Step 1: read in datafile

In [42]:
all_data <- read.csv("datafile/sample1.csv", head=FALSE, sep=",")

### Step 2: determine number of snp, sample, num_case, num_control

In [43]:
num_snp = ncol(all_data) - 1
num_sample = nrow(all_data)
case_control_col = ncol(all_data)
num_case = length(which(all_data[,case_control_col] == 1))
num_control = length(which(all_data[,case_control_col] == 0))

### Step 3: get all combinations of snp index

In [44]:
all_snp_combination = combn(num_snp, 2)

### Step 4: define functions to determine how many percent of cases or control can be determined from this combination

In [45]:
percent_case_explained <- function(snp_comb){
    num_case_match = length(which(all_data[,snp_comb[1]] == 1 & 
                                  all_data[,snp_comb[2]] == 1 & 
                                  all_data[,case_control_col] == 1))
    return(num_case_match/num_case)
}

percent_control_explained <- function(snp_comb){
    num_case_match = length(which(all_data[,snp_comb[1]] == 1 & 
                                  all_data[,snp_comb[2]] == 1 & 
                                  all_data[,case_control_col] == 0))
    return(num_case_match/num_control)
}

### Step 5: iterate through all combinations and calculate percent explained in case and control

In [46]:
num_combination = ncol(all_snp_combination)
all_snp_comb_frame = data.frame(snp1=integer(num_combination),
                               snp2=integer(num_combination),
                               case_percent=double(num_combination),
                               control_percent=double(num_combination))

for (comb_idx in 1:ncol(all_snp_combination)){
    all_snp_comb_frame$snp1[comb_idx] = all_snp_combination[1,comb_idx]
    all_snp_comb_frame$snp2[comb_idx] = all_snp_combination[2,comb_idx]
    all_snp_comb_frame$case_percent[comb_idx] = percent_case_explained(all_snp_combination[1:2, comb_idx])
    all_snp_comb_frame$control_percent[comb_idx] = percent_control_explained(all_snp_combination[1:2, comb_idx])
}

### Step 6: select combinations that is about threshold of percent explained

In [47]:
explained_threshold = 0.80
probable_snp_comb_case = subset(all_snp_comb_frame, case_percent > explained_threshold)
probable_snp_comb_control = subset(all_snp_comb_frame, control_percent > explained_threshold)