# Demonstrating statsmodels Modin Interoperability
###  Currently statsmodels is not completely interoperable with Modin. All the examples in this section are taken/ adapted from https://www.statsmodels.org/devel/gettingstarted.html or https://www.statsmodels.org/stable/index.html

In [None]:
import statsmodels.api as sm
import pandas
import modin.pandas as pd
from patsy import dmatrices

#### Example with sm.OLS()

In [None]:
df = sm.datasets.get_rdataset("Guerry", "HistData").data
modin_df = pd.DataFrame(df)

In [None]:
vars = ['Department', 'Lottery', 'Literacy', 'Wealth', 'Region']

modin_df = modin_df[vars]

modin_df[-5:]

In [None]:
modin_df = modin_df.dropna()

modin_df[-5:]

In [None]:
y, X = dmatrices('Lottery ~ Literacy + Wealth + Region', data=modin_df, return_type='dataframe')

In [None]:
y = pd.DataFrame(y)
X = pd.DataFrame(X)

In [None]:
mod = sm.OLS(y, X)    # Describe model

In [None]:
res = mod.fit()       # Fit model

print(res.summary())

sm.ols() is not interoperable with Modin currently.

#### Example with sm.ols(formula=)

In [None]:
modin_df = pd.DataFrame({"A": [10,20,30,40,50], "B": [20, 30, 10, 40, 50], "C": [32, 234, 23, 23, 42523]})

In [None]:
import statsmodels.formula.api as sm
result = sm.ols(formula="A ~ B + C", data=modin_df).fit()
print(result.params)

In [None]:
print(result.summary())

## Replicating statsmodels workflow with pandas

In [None]:
import statsmodels.api as sm

df = sm.datasets.get_rdataset("Guerry", "HistData").data
pandas_df = pandas.DataFrame(df)

In [None]:
vars = ['Department', 'Lottery', 'Literacy', 'Wealth', 'Region']

pandas_df = pandas_df[vars]

In [None]:
pandas_df = pandas_df.dropna()

In [None]:
y, X = dmatrices('Lottery ~ Literacy + Wealth + Region', data=df, return_type='dataframe')

In [None]:
y = pandas.DataFrame(y)
X = pandas.DataFrame(X)

In [None]:
mod = sm.OLS(y, X)    # Describe model

In [None]:
res = mod.fit()       # Fit model

print(res.summary())

#### Example with sm.ols(formula=)

In [None]:
pandas_df = pd.DataFrame({"A": [10,20,30,40,50], "B": [20, 30, 10, 40, 50], "C": [32, 234, 23, 23, 42523]})

In [None]:
import statsmodels.formula.api as sm
result = sm.ols(formula="A ~ B + C", data=pandas_df).fit()
print(result.params)

In [None]:
print(result.summary())