In [None]:
!pip install rpy2

In [1]:
%load_ext rpy2.ipython

In [2]:
import rpy2.robjects.packages as rpackages
from rpy2.robjects.vectors import StrVector
import rpy2.robjects as robjects

from statsmodels.genmod import families
from statsmodels.genmod.generalized_linear_model import GLM

import json
import numpy as np
import pandas as pd
from scipy.special import expit

In [18]:
model_families = {
    "gaussian": families.Gaussian(),
    "logit": families.Binomial(),  # Logit by default
    "probit": families.Binomial(link=families.links.Probit()),
    "poisson": families.Poisson(),
}

model_family_strings = [
        "gaussian",
        "logit",
        "probit",
        "poisson",
    ]

In [4]:
# Choosing a CRAN Mirror
# utils = rpackages.importr('utils')
# utils.chooseCRANmirror(ind=1)

# # Installing required packages
# packages = ('arm', 'rjson')
# utils.install_packages(StrVector(packages))

## Generate Test Data Set

In [5]:
np.random.seed(12345)
n = 100
x1 = np.random.normal(size=n)
x2 = np.random.binomial(1, 0.5, n)
b0 = 1
b1 = 1.5
b2 = 2
const = np.ones(n)
y = np.random.binomial(1, expit(b0 + b1 * x1 + b2 * x2), n)
X = np.transpose(np.vstack([const, x1, x2]))

df = pd.DataFrame(
    np.hstack((y.reshape(100, 1), X.reshape(100, 3))),
    columns=["y", "const", "x1", "x2"],
)

df.head()

Unnamed: 0,y,const,x1,x2
0,1.0,1.0,-0.204708,0.0
1,1.0,1.0,0.478943,0.0
2,0.0,1.0,-0.519439,0.0
3,0.0,1.0,-0.55573,0.0
4,1.0,1.0,1.965781,0.0


In [6]:
%%R
install.packages(c("arm", "rjson"))

  'lib = "/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library"' is not writable
Error in install.packages(c("arm", "rjson")) : unable to install packages


RInterpreterError: Failed to parse and evaluate line 'install.packages(c("arm", "rjson"))\n'.
R error message: 'Error in install.packages(c("arm", "rjson")) : unable to install packages'
R stdout:
Warning in install.packages(c("arm", "rjson")) :
  'lib = "/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library"' is not writable
Error in install.packages(c("arm", "rjson")) : unable to install packages

Pass the df generated in Python over to R, extract columns to vectors

In [7]:
%%R -i df

n <- 100
x1 <- df$x1
x2 <- df$x2
y <- df$y

head(df)

  y const         x1 x2
0 1     1 -0.2047077  0
1 1     1

  0.4789433  0
2 0     1 -0.5194387  0
3 0     1 -0.5557303  0
4 1     1  1.9657806  0
5 1     1  1.3934058  0


Create models in R from the data generated in Python. 

In [8]:
%%R

model_logit_constant <- arm::bayesglm (y ~ x1 + x2, family=binomial(link="logit"))

model_logit_no_constant <- arm::bayesglm (y ~ 0 + x1 + x2, family=binomial(link="logit"))

model_probit_constant <- arm::bayesglm (y ~ x1 + x2, family=binomial(link="probit"))

model_probit_no_constant <- arm::bayesglm (y ~ 0 + x1 + x2, family=binomial(link="probit"))

model_poisson_constant <- arm::bayesglm(y ~ x1 + x2, family=poisson)

model_poisson_no_constant <- arm::bayesglm(y ~ 0 + x1 + x2, family=poisson)

model_gaussian_constant <- arm::bayesglm(y ~ x1 + x2, family=gaussian)

model_gaussian_no_constant <- arm::bayesglm(y ~ 0 + x1 + x2, family=gaussian)

In [9]:
%%R

make_model_json <- function(model){
  model_attrs <- list(as.vector(model$coefficients),
                                     as.vector(model$fitted.values),
                                     as.vector(model$residuals))
  names(model_attrs) <- c("params", "fittedvalues", "resid_working")
  return(model_attrs)
}

output_this_json <-list(make_model_json(model_logit_constant),
                        make_model_json(model_logit_no_constant),
                        make_model_json(model_probit_constant),
                        make_model_json(model_probit_no_constant),
                        make_model_json(model_poisson_constant),
                        make_model_json(model_poisson_no_constant),
                        make_model_json(model_gaussian_constant),
                        make_model_json(model_gaussian_no_constant)
                        )
                        
names(output_this_json) <- c("logit_constant",
                             "logit_no_constant",
                             "probit_constant",
                             "probit_no_constant",
                             "poisson_constant",
                             "poisson_no_constant",
                             "gaussian_constant",
                             "gaussian_no_constant")

json_results <- rjson::toJSON(output_this_json)

Pass R model outputs to Python as a json object

In [10]:
r_results = json.loads(str(robjects.globalenv['json_results'][0]))

Check models with a constant

In [23]:
for family in model_families:
    python_model = GLM(endog = df.y, exog = df.drop(columns = "y"), family = model_families[family])
    python_results = python_model.fit(method="BIRLS")

    assert np.isclose(
            python_results.fittedvalues,
            r_results[family + "_constant"]["fittedvalues"],
            atol=1e-05,
        ).all()

    assert np.isclose(
            python_results.resid_working,
            r_results[family + "_constant"]["resid_working"],
            atol=1e-05,
        ).all()

    assert np.isclose(
            python_results.params,
            r_results[family + "_constant"]["params"],
            atol=1e-05,
        ).all()
    
print("Values match between R and Python!")

Values match between R and Python!


Check for models with no constant

In [25]:
for family in model_families:
    # drop the const column
    python_model = GLM(endog = df.y, exog = df.drop(columns = ["y", "const"]), family = model_families[family])
    python_results = python_model.fit(method="BIRLS")

    assert np.isclose(
            python_results.fittedvalues,
            r_results[family + "_no_constant"]["fittedvalues"],
            atol=1e-05,
        ).all()

    assert np.isclose(
            python_results.resid_working,
            r_results[family + "_no_constant"]["resid_working"],
            atol=1e-05,
        ).all()

    assert np.isclose(
            python_results.params,
            r_results[family + "_no_constant"]["params"],
            atol=1e-05,
        ).all()
    
print("Values match between R and Python!")

Values match between R and Python!
