In [1]:
library("ggplot2")
library("dplyr")
library("reshape2")
library("knitr")


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [2]:
Diabetes = read.csv(file = "diabetes.csv")

In [3]:
Diabetes.frame.gender.df <-
  Diabetes %>%
  group_by(frame, gender) %>%
  filter(gender != "Both") %>%
  summarize(n = n())

In [4]:
Diabetes.frame.gender.df

frame,gender,n
,female,7
,male,5
large,female,42
large,male,61
medium,female,116
medium,male,68
small,female,69
small,male,35


In [5]:
Diabetes.frame.gender.df %>%
  dcast(frame ~ gender, value.nar = "n") %>%
  kable(align = "l", format = "markdown",
        table.attr='class="table table-striped table-hover"')

Using n as value column: use value.var to override.




|frame  |female |male |
|:------|:------|:----|
|       |7      |5    |
|large  |42     |61   |
|medium |116    |68   |
|small  |69     |35   |

In [6]:
Diabetes.frame.gender.prop.df <- 
  Diabetes.frame.gender.df %>%
  ungroup() %>%
  mutate(prop = n / sum(n))

Diabetes.frame.gender.prop.df %>%
  dcast(frame ~ gender, value.var = "prop") %>%
  kable(align = "l", format = "markdown", 
        table.attr = 'class="table table-striped table-hover"')



|frame  |female    |male      |
|:------|:---------|:---------|
|       |0.0173697 |0.0124069 |
|large  |0.1042184 |0.1513648 |
|medium |0.2878412 |0.1687345 |
|small  |0.1712159 |0.0868486 |

In [7]:
frame.marginal.df <- 
  Diabetes.frame.gender.prop.df %>%
  group_by(frame) %>%
  summarize(marginal = sum(prop))

gender.marginal.df <- 
  Diabetes.frame.gender.prop.df %>%
  group_by(gender) %>%
  summarize(marginal = sum(prop))

In [8]:
Diabetes.frame.gender.prop.df %>%
  dcast(frame ~ gender, value.var = "prop") %>%
  left_join(frame.marginal.df, by = "frame") %>%
  bind_rows(
    gender.marginal.df %>%
      mutate(frame = "marginal") %>%
      dcast(frame ~ gender, value.var = "marginal")
  ) %>%
  kable(align = "l", format = "markdown",
        table.attr = 'class="table table-striped table-hover"')

“binding character and factor vector, coercing into character vector”



|frame    |female    |male      |marginal  |
|:--------|:---------|:---------|:---------|
|         |0.0173697 |0.0124069 |0.0297767 |
|large    |0.1042184 |0.1513648 |0.2555831 |
|medium   |0.2878412 |0.1687345 |0.4565757 |
|small    |0.1712159 |0.0868486 |0.2580645 |
|marginal |0.5806452 |0.4193548 |NA        |

In [15]:
joint.prob <- 
  Diabetes.frame.gender.prop.df %>%
  filter(frame == "large", gender == "female") %>%
  .$prop

marg.prob <- 
  gender.marginal.df %>%
  filter(gender == "female") %>%
  .$marginal

cond.prob <- joint.prob / marg.prob
joint.prob
marg.prob
cond.prob