In [1]:
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
data <- read.table("data/clinical_information.txt", header=T, row.names=1, 
    sep="\t", na.strings=c(999))
dim(data)
colnames(data)

In [3]:
data <- data %>%
    mutate(age_at_op = case_when(
        age_at_op >= 20 & age_at_op < 40 ~ "[20, 40)",
        age_at_op >= 40 & age_at_op < 60 ~ "[40, 60)",
        age_at_op >= 60 ~ "[60, 85]"),
        bmi = case_when(
        bmi < 20 ~ "< 20",
        bmi >= 20 & bmi < 30 ~ "20 >= bmi > 30",
        bmi >= 30 ~ ">= 30"),
        age_at_period_begin = case_when(
        age_at_period_begin < 14 ~ "< 14",
        age_at_period_begin >= 14 & age_at_period_begin <17 ~ "14 >= age > 17",
        age_at_period_begin >= 17 ~ ">= 17")
    )

In [4]:
table(data$tnm_n_total)


    N0 N0(i+)     N1     N2     N3 
  2393     29    112     13      9 

In [5]:
data$group <- "control"
data$group[which(data$tnm_n_total %in% c("N1", "N2", "N3"))] <- "case"
data$group <- factor(data$group, levels=c("case", "control"),
                  labels=c("case", "control"))
table(data$group)


   case control 
    134    2422 

In [6]:
data$group2 <- "NA"
data$group2[which(data$tnm_n_total == "N1")] <- "N1"
data$group2[which(data$tnm_n_total == "N2")] <- "N2"
data$group2[which(data$tnm_n_total == "N3")] <- "N3"
data$group2 <- factor(data$group2, levels=c("N1", "N2", "N3"),
                  labels=c("N1", "N2", "N3"))
table(data$group2)


 N1  N2  N3 
112  13   9 

In [7]:
# breast_op
data$breast_op <- factor(data$breast_op, levels=c(1:6), 
                      labels=c("BCS", "TM", "TM+IR", "Bx", "N/A", "No OP"))
table(data$group, data$breast_op)
fisher.test(table(data$group, data$breast_op))

         
           BCS   TM TM+IR   Bx  N/A No OP
  case      52   80     0    1    1     0
  control 1128 1279     1    8    5     1


	Fisher's Exact Test for Count Data

data:  table(data$group, data$breast_op)
p-value = 0.1527
alternative hypothesis: two.sided


In [8]:
# axillary_op
data$axillary_op <- factor(data$axillary_op, levels=c(1:4), 
                      labels=c("SLNB", "ALND", "S+A", "No OP"))
table(data$group, data$axillary_op)
fisher.test(table(data$group, data$axillary_op))

         
          SLNB ALND  S+A No OP
  case      39   79   16     0
  control 2059  335    2    26


	Fisher's Exact Test for Count Data

data:  table(data$group, data$axillary_op)
p-value < 2.2e-16
alternative hypothesis: two.sided


In [9]:
table(data$group2, data$axillary_op)
fisher.test(table(data$group2, data$axillary_op))

    
     SLNB ALND S+A No OP
  N1   39   69   4     0
  N2    0    7   6     0
  N3    0    3   6     0


	Fisher's Exact Test for Count Data

data:  table(data$group2, data$axillary_op)
p-value = 9.188e-09
alternative hypothesis: two.sided


In [10]:
# age_at_op
data$age_at_op <- factor(data$age_at_op, levels=c("[20, 40)", "[40, 60)", "[60, 85]"), 
                      labels=c("[20, 40)", "[40, 60)", "[60, 85]"))
table(data$group, data$age_at_op)
fisher.test(table(data$group, data$age_at_op))

         
          [20, 40) [40, 60) [60, 85]
  case          21       88       25
  control      342     1636      444


	Fisher's Exact Test for Count Data

data:  table(data$group, data$age_at_op)
p-value = 0.8285
alternative hypothesis: two.sided


In [11]:
# bmi
data$bmi <- factor(data$bmi, levels=c("< 20", "20 >= bmi > 30", ">= 30"), 
                      labels=c("< 20", "20 >= bmi > 30", ">= 30"))
table(data$group, data$bmi)
fisher.test(table(data$group, data$bmi))

         
          < 20 20 >= bmi > 30 >= 30
  case      18             92     1
  control  311           1724    70


	Fisher's Exact Test for Count Data

data:  table(data$group, data$bmi)
p-value = 0.4162
alternative hypothesis: two.sided


In [12]:
# age_at_period_begin
data$age_at_period_begin <- factor(data$age_at_period_begin, 
                      levels=c("< 14", "14 >= age > 17", ">= 17"), 
                      labels=c("< 14", "14 >= age > 17", ">= 17"))
table(data$group, data$age_at_period_begin)
fisher.test(table(data$group, data$age_at_period_begin))

         
          < 14 14 >= age > 17 >= 17
  case      27             57    13
  control  440           1012   221


	Fisher's Exact Test for Count Data

data:  table(data$group, data$age_at_period_begin)
p-value = 0.9311
alternative hypothesis: two.sided


In [13]:
# menopause
data$menopause <- factor(data$menopause, levels=c(0, 1),
                      labels=c("Yes", "No"))
table(data$group, data$menopause)
fisher.test(table(data$group, data$menopause))

         
          Yes  No
  case      0  29
  control  12 767


	Fisher's Exact Test for Count Data

data:  table(data$group, data$menopause)
p-value = 1
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
  0.00000 10.04796
sample estimates:
odds ratio 
         0 


In [14]:
# contraceptives
data$contraceptives <- factor(data$contraceptives, levels=c(0, 1),
                           labels=c("No", "Yes"))
table(data$group, data$contraceptives)
fisher.test(table(data$group, data$contraceptives))

         
            No  Yes
  case      83   11
  control 1515  180


	Fisher's Exact Test for Count Data

data:  table(data$group, data$contraceptives)
p-value = 0.731
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.4647094 1.9009409
sample estimates:
odds ratio 
 0.8965742 


In [15]:
# HRT
data$HRT <- factor(data$HRT, levels=c(0, 1),
                           labels=c("No", "Yes"))
table(data$group, data$HRT)
fisher.test(table(data$group, data$HRT))

         
            No  Yes
  case      86    4
  control 1544  144


	Fisher's Exact Test for Count Data

data:  table(data$group, data$HRT)
p-value = 0.2373
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.7385419 7.6326907
sample estimates:
odds ratio 
  2.004574 


In [16]:
# BRCA_family_history
data$BRCA_family_history <- factor(data$BRCA_family_history, levels=c(0, 1),
                           labels=c("No", "Yes"))
table(data$group, data$BRCA_family_history)
fisher.test(table(data$group, data$BRCA_family_history))

         
            No  Yes
  case     100   10
  control 1956  219


	Fisher's Exact Test for Count Data

data:  table(data$group, data$BRCA_family_history)
p-value = 0.871
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.5729131 2.4425113
sample estimates:
odds ratio 
  1.119606 
