In [1]:
from IPython.display import display
import numpy as np
import pandas as pd
from re import search

In [85]:
class ContTable:
    def __init__(self, de, nde, dne, ndne, verbose=1, xlabs=["D", "~D"], ylabs=["E", "~E"]):
        assert de >= 0 and nde >= 0 and dne >= 0 and ndne >= 0, "All values in the contingency table must be positive."
        self.desc = [
            "Diseased and exposed", 
            "Not disease but exposed", 
            "Diseased but not exposed", 
            "Not diseased and not exposed"
        ]
        self.xlabels = xlabs
        self.ylabels = ylabs
        self.de = de; #Diseased and exposed = a
        self.nde = nde; #Not disease but exposed = b
        self.dne = dne; #Diseased but not exposed = c
        self.ndne = ndne; #Not diseased and not exposed = d
        self.num_d = de + dne;
        self.num_nd = nde + ndne;
        self.num_e = de + nde;
        self.num_ne = dne + ndne;
        self.num_tot = de+nde+dne+ndne
        self.cont_matrix = np.matrix([[self.de,self.nde],[self.dne,self.ndne]])
        self.cont_table = pd.DataFrame(self.cont_matrix)
        self.cont_table.columns = self.ylabels
        self.cont_table.index = self.xlabels
        if verbose:
            self.display()
        self.relative_risk(verbose)
        self.odds_ratio(verbose)
        self.excess_risk(verbose)
        self.attributable_risk(verbose)
    def relative_risk(self, verbose=1):
        #Basis for multiplicative risk
        #RR = P(D|E)/P(D|~E)
        num = self.de/self.num_e
        denom = self.dne/self.num_ne
        self.RR = num/denom
        if verbose:
            print("Relative Risk (RR):", self.RR)
        if verbose > 1:
            if self.RR == 1:
                print("Relative Risk (RR) indicates independence between D & E.")
            elif self.RR > 1:
                print("Relative Risk (RR) indicates a positive relationship between D & E.")
            else:
                print("Relative Risk (RR) indicates a negative relationship between D & E.")
            print("")
        assert self.RR >= 0, "Relative risk must be non-negative!"
        #Restriction on range (below) becomes problematic with common disease outcomes
        assert self.RR <= 1/denom, "Relative Risk is always <= 1/P(D|~E)"
        return self.RR
    def odds_ratio(self, verbose=1):
        #OR = [P(D|E)/P(~D|E)]/[P(D|~E)/P(~D|~E)] = (ad)/(bc)
        self.OR = (self.de*self.ndne)/(self.nde*self.dne);
        if verbose:
            print("Odds Ratio (OR):", self.OR)
        if verbose > 1:
            if self.OR == 1:
                print("Odds Ratio (OR) indicates independence between D & E.")
            elif self.OR > 1:
                print("Odds Ratio (OR) indicates a positive relationship between D & E.")
            else:
                print("Odds Ratio (OR) indicates a negative relationship between D & E.")
            print("")
        assert self.OR >= 0, "Odds Ratio must be non-negative!"
        #Odds Ratio has no upper bound, unlike Relative Risk
        return self.OR
    def excess_risk(self, verbose=1):
        #Basis for additive risk
        #ER = P(D|E) - P(D|~E) 
        first = self.de/self.num_e
        second = self.dne/self.num_ne
        self.ER = first-second
        if verbose:
            print("Excess Risk (ER):", self.ER)
        if verbose > 1:
            if self.ER == 0:
                print("Excess Risk (ER) indicates independence between D & E.")
            elif self.ER > 0:
                print("Excess Risk (ER) indicates a positive relationship between D & E.")
            else:
                print("Excess Risk (ER) indicates a negative relationship between D & E.")
            print("")
        assert self.ER >= -1 and self.ER <= 1, "Excess Risk (ER) is bounded by [-1, 1]." 
        return self.ER
    def attributable_risk(self, verbose=1):
        #AR = [P(D) - P(D|~E)]/P(D)
        pd = (self.de + self.dne)/self.num_tot
        pdne = self.dne/self.num_ne
        self.AR = (pd - pdne)/pd
        if verbose:
            print("Attributable Risk (AR):", self.AR)
        if verbose > 1:
            if self.AR == 0:
                print("Attributable Risk (AR) indicates independence between D & E.")
            elif self.AR > 0:
                print("Attributable Risk (AR) indicates a positive relationship between D & E.")
            else:
                print("Attributable Risk (AR) indicates a negative relationship between D & E.")
            print("")
        return self.AR
    def display(self):
        print("2x2 Contingency Table:")
        display(self.cont_table)
    def get_prob(self, prob_string):
        valid = search("P\((.*)\)", prob_string)
        if valid:
            cond_prob = search("([\~a-zA-Z]*)\|([\~a-zA-Z]*)", valid.group(1))
            if cond_prob:
                a = cond_prob.group(1)
                b = cond_prob.group(2)
                if a in self.xlabels:
                    return marital_status.cont_table.loc[a, b]/marital_status.cont_table.loc[:, b].sum()
                else:
                    return marital_status.cont_table.loc[b, a]/marital_status.cont_table.loc[:, a].sum()
            else:
                prob_chars = valid.group(1)
                if prob_chars in self.xlabels:
                    return marital_status.cont_table.loc[prob_chars, :].sum()/self.num_tot
                else:
                    return marital_status.cont_table.loc[:, prob_chars].sum()/self.num_tot

In [86]:
marital_status = ContTable(16712, 1197142, 18784, 2878421, 1)

2x2 Contingency Table:


Unnamed: 0,E,~E
D,16712,1197142
~D,18784,2878421


Relative Risk (RR): 2.1235041772801155
Odds Ratio (OR): 2.139188199568787
Excess Risk (ER): 0.007284228235844439
Attributable Risk (AR): 0.2490981964444646


In [87]:
marital_status.get_prob("P(E)") + marital_status.get_prob("P(~E)")
marital_status.get_prob("P(D|E)") + marital_status.get_prob("P(~D|E)") 

1.0

In [88]:
birthweight = ContTable(21054, 271269, 14442, 3804294, 1)

2x2 Contingency Table:


Unnamed: 0,E,~E
D,21054,271269
~D,14442,3804294


Relative Risk (RR): 19.044252295766743
Odds Ratio (OR): 20.444720789531505
Excess Risk (ER): 0.06824119071217893
Attributable Risk (AR): 0.5619920439642855
