In [1]:
library("ggplot2")
library("dplyr")
library("reshape2")
library("knitr")


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [2]:
Diabetes = read.csv(file = "diabetes.csv")

In [3]:
Diabetes$chol_group <- with(Diabetes, cut(chol, breaks =3, labels=1:3))
Diabetes$chol_group

In [9]:
Diabetes$age_group <- with(Diabetes, cut(age, breaks =3, labels= 1:3))
Diabetes$age_group

In [5]:
Diabetes.age.chol.df <-
  Diabetes %>%
  group_by(chol_group, age_group) %>%
  filter(chol_group != "NA") %>%
  summarize(n = n())

In [6]:
Diabetes.age.chol.df

chol_group,age_group,n
1,1,113
1,2,59
1,3,12
2,1,77
2,2,100
2,3,35
3,1,3
3,2,3


In [7]:
Diabetes.age.chol.df %>%
  dcast(age_group ~ chol_group, value.nar = "n") %>%
  kable(align = "l", format = "markdown",
        table.attr='class="table table-striped table-hover"')

Using n as value column: use value.var to override.




|age_group |1   |2   |3  |
|:---------|:---|:---|:--|
|1         |113 |77  |3  |
|2         |59  |100 |3  |
|3         |12  |35  |NA |

In [10]:
Diabetes.age.chol.prop.df <- 
  Diabetes.age.chol.df %>%
  ungroup() %>%
  mutate(prop = n / sum(n))

Diabetes.age.chol.prop.df %>%
  dcast(age_group ~ chol_group, value.var = "prop") %>%
  kable(align = "l", format = "markdown", 
        table.attr = 'class="table table-striped table-hover"')



|age_group |1         |2         |3         |
|:---------|:---------|:---------|:---------|
|1         |0.2810945 |0.1915423 |0.0074627 |
|2         |0.1467662 |0.2487562 |0.0074627 |
|3         |0.0298507 |0.0870647 |NA        |

In [13]:
age.marginal.df <- 
  Diabetes.age.chol.prop.df %>%
  group_by(age_group) %>%
  summarize(marginal = sum(prop))

chol.marginal.df <- 
  Diabetes.age.chol.prop.df %>%
  group_by(chol_group) %>%
  summarize(marginal = sum(prop))

In [14]:
Diabetes.age.chol.prop.df %>%
  dcast(age_group ~ chol_group, value.var = "prop") %>%
  left_join(age.marginal.df, by = "age_group") %>%
  bind_rows(
    chol.marginal.df %>%
      mutate(age_group = "marginal") %>%
      dcast(age_group ~ chol_group, value.var = "marginal")
  ) %>%
  kable(align = "l", format = "markdown",
        table.attr = 'class="table table-striped table-hover"')

“binding character and factor vector, coercing into character vector”



|age_group |1         |2         |3         |marginal  |
|:---------|:---------|:---------|:---------|:---------|
|1         |0.2810945 |0.1915423 |0.0074627 |0.4800995 |
|2         |0.1467662 |0.2487562 |0.0074627 |0.4029851 |
|3         |0.0298507 |0.0870647 |NA        |0.1169154 |
|marginal  |0.4577114 |0.5273632 |0.0149254 |NA        |

In [15]:
joint.prob <- 
  Diabetes.age.chol.prop.df %>%
  filter(age_group == "1", chol_group == "1") %>%
  .$prop

marg.prob <- 
  chol.marginal.df %>%
  filter(chol_group == "1") %>%
  .$marginal

cond.prob <- joint.prob / marg.prob
cond.prob