pzivich · pzivich · Jul 17, 2019 · Jun 26, 2019 · Jun 30, 2019 · Jun 30, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,38 @@
 ## Change logs
 
+### v0.8.0
+`IPTW` had a massive overhaul. It now follows a similar structure to `AIPTW` and other causal inference methods. 
+One *major* change is that missing data is dropped before any calculations. Therefore, if missing data was present for
+certain types of data, the weights may no longer match with previous versions. While users can still call the weights 
+attribute, all the calculations of the ATE are now contained within the `IPTW` class. Future updates with be other 
+instances of the IPTW calculations for other methods, like `LongitudinalIPTW` and `SurvivalIPTW`. The major advantage
+of this new structure is it removes some of the burden from users on how to apply IPTW to different data structures.
+
+Diagnostic functions have been added to `TimeFixedGFormula`, `AIPTW`, and `TMLE`. The diagnostics have been restructured
+for functions contained within a different file rather than function instances within specific classes. This is 
+due to diagnostics commonly being shared across functions.
+
+How missing data is handled by`AIPTW` and `IPTW` has been updated. Rather than dropping all missing data, they only drop
+missing data for non-outcome variables. This behavior mimics `TMLE`. Additionally, both have gained the `missing_model`
+function. This new function calculates inverse probability of censoring weights.
+
+`bound` argument is now available to `IPTW` and `AIPTW` to truncate the predicted probabilities of the g-model. The 
+behavior is the same as `TMLE`. `bound` is also available for `missing_model()`.
+
+`IPCW` no longer supports late-entries into the data. The pooled logistic regression model will not correctly accrue 
+weights when late entries occur. This is not a problem I have seen reported in the literature, but I have seen it in 
+my own simulations. While you can correctly estimate IPCW with time-fixed variables, this is difficult for me to 
+detect. Instead, I have `IPCW` not allow late-entries. If users would like to allow late-entries, they would need to 
+"extend backwards" observations or they would need to drop the late-entries. I have update the documentation to note
+this change.
+
+S-value calculator function has been added. `s_value` returns the correspond transformed p-value into an s-value. See
+documentation for details on s-values and how to interpret them.
+
+I have also been moving around background functions. Most notably, the IPTW diagnostics have migrated to the 
+`causal/utils.py` branch since these diagnostics are to be used by other causal inference methods. These reformats 
+should have no change for users. This is merely maintenance on my end.
+
 ### v0.7.2
 Labeling fix for `RiskDifference` summary
 

diff --git a/docs/Reference/Calculations.rst b/docs/Reference/Calculations.rst
@@ -48,3 +48,4 @@ Others
   counternull_pvalue
   semibayes
   rubins_rules
+  s_value
diff --git a/...erated/zepid.calc.untils.rubins_rules.rst → ...nerated/zepid.calc.utils.rubins_rules.rst b/...erated/zepid.calc.untils.rubins_rules.rst → ...nerated/zepid.calc.utils.rubins_rules.rst
diff --git a/docs/Reference/generated/zepid.calc.utils.s_value.rst b/docs/Reference/generated/zepid.calc.utils.s_value.rst
@@ -0,0 +1,6 @@
+zepid.calc.utils.s\_value
+==============================
+
+.. currentmodule:: zepid.calc.utils
+
+.. autofunction:: s_value
diff --git a/docs/Reference/generated/zepid.causal.doublyrobust.AIPW.AIPTW.rst b/docs/Reference/generated/zepid.causal.doublyrobust.AIPW.AIPTW.rst
@@ -13,10 +13,12 @@ zepid.causal.doublyrobust.AIPW.AIPTW
 
       ~AIPTW.exposure_model
       ~AIPTW.outcome_model
+      ~AIPTW.missing_model
       ~AIPTW.fit
       ~AIPTW.summary
-
-
-
-
+      ~AIPTW.run_diagnostics
+      ~AIPTW.positivity
+      ~AIPTW.standardized_mean_differences
+      ~AIPTW.plot_kde
+      ~AIPTW.plot_love
 
diff --git a/docs/Reference/generated/zepid.causal.doublyrobust.TMLE.TMLE.rst b/docs/Reference/generated/zepid.causal.doublyrobust.TMLE.TMLE.rst
@@ -15,6 +15,11 @@ zepid.causal.doublyrobust.TMLE.TMLE
       ~TMLE.missing_model
       ~TMLE.fit
       ~TMLE.summary
+      ~TMLE.run_diagnostics
+      ~TMLE.positivity
+      ~TMLE.standardized_mean_differences
+      ~TMLE.plot_kde
+      ~TMLE.plot_love
 
 
 

diff --git a/docs/Reference/generated/zepid.causal.ipw.IPTW.IPTW.rst b/docs/Reference/generated/zepid.causal.ipw.IPTW.IPTW.rst
@@ -10,15 +10,13 @@ zepid.causal.ipw.IPTW.IPTW
 
    .. autosummary::
 
-      ~IPTW.regression_models
+      ~IPTW.treatment_model
+      ~IPTW.marginal_structural_model
+      ~IPTW.missing_model
       ~IPTW.fit
+      ~IPTW.run_diagnostics
       ~IPTW.positivity
       ~IPTW.standardized_mean_differences
       ~IPTW.plot_boxplot
       ~IPTW.plot_kde
       ~IPTW.plot_love
-
-
-
-
-
diff --git a/docs/Reference/generated/zepid.causal.snm.g_estimation.GEstimationSNM.rst b/docs/Reference/generated/zepid.causal.snm.g_estimation.GEstimationSNM.rst
@@ -12,5 +12,6 @@ zepid.causal.snm.g\_estimation.GEstimationSNM
 
       ~GEstimationSNM.exposure_model
       ~GEstimationSNM.structural_nested_model
+      ~GEstimationSNM.missing_model
       ~GEstimationSNM.fit
       ~GEstimationSNM.summary
diff --git a/docs/Time-Fixed Exposure.rst b/docs/Time-Fixed Exposure.rst
diff --git a/tests/test_graphics_manual.py → tests/graphics_manualtest.py b/tests/test_graphics_manual.py → tests/graphics_manualtest.py
@@ -11,7 +11,8 @@
 from zepid.graphics import (EffectMeasurePlot, functional_form_plot, pvalue_plot, spaghetti_plot,
                             roc, dynamic_risk_plot, labbe_plot)
 from zepid.causal.ipw import IPTW
-from zepid.causal.gformula import MonteCarloGFormula, SurvivalGFormula
+from zepid.causal.gformula import MonteCarloGFormula, SurvivalGFormula, TimeFixedGFormula
+from zepid.causal.doublyrobust import AIPTW, TMLE
 from zepid.sensitivity_analysis import MonteCarloRR, trapezoidal
 
 
@@ -135,12 +136,19 @@ def senstivity_check():
 
 
 def causal_check():
-    # Check IPTW plots
-    data = load_sample_data(False)
+    data = load_sample_data(False).drop(columns=['cd4_wk45'])
     data[['cd4_rs1', 'cd4_rs2']] = spline(data, 'cd40', n_knots=3, term=2, restricted=True)
     data[['age_rs1', 'age_rs2']] = spline(data, 'age0', n_knots=3, term=2, restricted=True)
-    ipt = IPTW(data, treatment='art', stabilized=True)
-    ipt.regression_models('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
+
+    # Check TimeFixedGFormula diagnostics
+    g = TimeFixedGFormula(data, exposure='art', outcome='dead')
+    g.outcome_model(model='art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
+    g.run_diagnostics(decimal=3)
+
+    # Check IPTW plots
+    ipt = IPTW(data, treatment='art', outcome='dead', stabilized=True)
+    ipt.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
+    ipt.marginal_structural_model('art')
     ipt.fit()
     ipt.plot_love()
     plt.tight_layout()
@@ -153,6 +161,33 @@ def causal_check():
     plt.show()
     ipt.plot_boxplot(measure='logit')
     plt.show()
+    ipt.run_diagnostics()
+
+    # Check AIPTW Diagnostics
+    aipw = AIPTW(data, exposure='art', outcome='dead')
+    aipw.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
+    aipw.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
+    aipw.fit()
+    aipw.run_diagnostics()
+    aipw.plot_kde(to_plot='exposure')
+    plt.show()
+    aipw.plot_kde(to_plot='outcome')
+    plt.show()
+    aipw.plot_love()
+    plt.show()
+
+    # Check TMLE diagnostics
+    tmle = TMLE(data, exposure='art', outcome='dead')
+    tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
+    tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
+    tmle.fit()
+    tmle.run_diagnostics()
+    tmle.plot_kde(to_plot='exposure')
+    plt.show()
+    tmle.plot_kde(to_plot='outcome')
+    plt.show()
+    tmle.plot_love()
+    plt.show()
 
     # Check SurvivalGFormula plots
     df = load_sample_data(False).drop(columns=['cd4_wk45'])

diff --git a/tests/test_calculator.py b/tests/test_calculator.py
@@ -6,7 +6,7 @@
 from zepid.calc import (risk_ci, incidence_rate_ci, risk_ratio, risk_difference, number_needed_to_treat, odds_ratio,
                         incidence_rate_ratio, incidence_rate_difference, odds_to_probability, probability_to_odds,
                         semibayes, attributable_community_risk, population_attributable_fraction, sensitivity,
-                        specificity, npv_converter, ppv_converter, rubins_rules)
+                        specificity, npv_converter, ppv_converter, rubins_rules, s_value)
 
 
 @pytest.fixture
@@ -349,3 +349,15 @@ def test_match_sas2(self):
         b = rubins_rules(rr_est, rr_var)
         npt.assert_allclose(b[0], est_sas)
         npt.assert_allclose(b[1], std_sas)
+
+
+class TestSValues:
+
+    def test_svalue1(self):
+        npt.assert_allclose(4.3219280949, s_value(0.05))
+
+    def test_svalue2(self):
+        npt.assert_allclose(0.2009126939, s_value(0.87))
+
+    def test_svalue3(self):
+        npt.assert_allclose([4.3219280949, 0.2009126939], s_value([0.05, 0.87]))
diff --git a/tests/test_doublyrobust.py b/tests/test_doublyrobust.py
@@ -342,7 +342,7 @@ def dat(self):
     def test_drop_missing_data(self):
         df = ze.load_sample_data(False)
         aipw = AIPTW(df, exposure='art', outcome='dead')
-        assert df.dropna().shape[0] == aipw.df.shape[0]
+        assert df.dropna(subset=['cd4_wk45']).shape[0] == aipw.df.shape[0]
 
     def test_error_when_no_models_specified1(self, df):
         aipw = AIPTW(df, exposure='art', outcome='dead')
@@ -453,3 +453,25 @@ def test_weighted_continuous_outcomes(self, cf):
         aipw.fit()
         npt.assert_allclose(aipw.average_treatment_effect, 225.13767, rtol=1e-3)
         assert aipw.average_treatment_effect_ci is None
+
+    def test_bounds(self, df):
+        aipw = AIPTW(df, exposure='art', outcome='dead')
+        aipw.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
+                            bound=0.1, print_results=False)
+        aipw.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
+                           print_results=False)
+        aipw.fit()
+
+        npt.assert_allclose(aipw.risk_difference, -0.0819506956)
+        npt.assert_allclose(aipw.risk_difference_ci, (-0.1498808287, -0.0140205625))
+
+    def test_bounds2(self, df):
+        aipw = AIPTW(df, exposure='art', outcome='dead')
+        aipw.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
+                            bound=[0.2, 0.9], print_results=False)
+        aipw.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
+                           print_results=False)
+        aipw.fit()
+
+        npt.assert_allclose(aipw.risk_difference, -0.0700780176)
+        npt.assert_allclose(aipw.risk_difference_ci, (-0.1277925885, -0.0123634468))
diff --git a/tests/test_generalize.py b/tests/test_generalize.py
@@ -25,10 +25,9 @@ def df_c():
 def df_iptw(df_c):
     dfs = df_c.loc[df_c['S'] == 1].copy()
 
-    ipt = IPTW(dfs, treatment='A')
-    ipt.regression_models('L', print_results=False)
-    ipt.fit()
-    dfs['iptw'] = ipt.Weight
+    ipt = IPTW(dfs, treatment='A', outcome='Y')
+    ipt.treatment_model('L', stabilized=True, print_results=False)
+    dfs['iptw'] = ipt.iptw
     return pd.concat([dfs, df_c.loc[df_c['S'] == 0]], ignore_index=True, sort=False)