## Imports

In [1]:
import spreg
import numpy as np
import pandas as pd
import geopandas as gpd
from libpysal.examples import load_example
from libpysal.weights import Kernel, fill_diagonal

ModuleNotFoundError: No module named 'spreg'

## Load data (Boston example)

In [2]:
boston = load_example("Bostonhsg")
boston_df = gpd.read_file(boston.get_path("boston.shp"))

NameError: name 'load_example' is not defined

### Manually transform data and set up model matrices

In [3]:
boston_df["NOXSQ"] = (10 * boston_df["NOX"])**2
boston_df["RMSQ"] = boston_df["RM"]**2
boston_df["LOGDIS"] = np.log(boston_df["DIS"].values)
boston_df["LOGRAD"] = np.log(boston_df["RAD"].values)
boston_df["TRANSB"] = boston_df["B"].values / 1000
boston_df["LOGSTAT"] = np.log(boston_df["LSTAT"].values)

fields = ["RMSQ", "AGE", "LOGDIS", "LOGRAD", "TAX", "PTRATIO",
          "TRANSB", "LOGSTAT", "CRIM", "ZN", "INDUS", "CHAS", "NOXSQ"]
X = boston_df[fields].values
y = np.log(boston_df["CMEDV"].values)  # predict log corrected median house prices from covars

NameError: name 'boston_df' is not defined

### Make weights matrix and set diagonal to 0 (necessary for lag model)

In [None]:
weights = Kernel(boston_df[["x", "y"]], k=50, fixed=False)
weights = fill_diagonal(weights, 0)

## Test the formulas

In [None]:
# Original OLS model, manually transformed fields
formula = "log(CMEDV) ~ RMSQ + AGE + LOGDIS + LOGRAD + TAX + PTRATIO + TRANSB" + \
          " + LOGSTAT + CRIM + ZN + INDUS + CHAS + NOXSQ"
model, parsed_formula = spreg.from_formula(formula, boston_df, debug=True,
                                           name_y="CMEDV", name_x=fields)
print(type(model))
print(parsed_formula)
print(model.summary)
    
# OLS model, fields transformed using formulaic
formula = "log(CMEDV) ~ {RM**2} + AGE + log(DIS) + log(RAD) + TAX + PTRATIO" + \
          " + {B/1000} + log(LSTAT) + CRIM + ZN + INDUS + CHAS + {(10*NOX)**2}"
model, parsed_formula = spreg.from_formula(formula, boston_df, debug=True,
                                           name_y="CMEDV", name_x=fields)
print(type(model))
print(parsed_formula)
print(model.summary)

# SLX model
# note that type(model) == spreg.OLS as SLX is just smoothing covars
formula = "log(CMEDV) ~ {RM**2} + AGE + log(RAD) + TAX + PTRATIO" + \
          " + {B/1000} + log(LSTAT) + <CRIM + ZN + INDUS + CHAS> + {(10*NOX)**2}"
model, parsed_formula = spreg.from_formula(formula, boston_df, w=weights, debug=True)
print(type(model))
print(parsed_formula)
print(model.summary)

# SLY model
formula = "log(CMEDV) ~ {RM**2} + AGE + <log(CMEDV)>"
model, parsed_formula = spreg.from_formula(formula, boston_df, w=weights, debug=True)
print(type(model))
print(parsed_formula)
print(model.summary)

# Error model
formula = "log(CMEDV) ~ {RM**2} + AGE + TAX + PTRATIO + {B/1000}" + \
          " + log(LSTAT) + CRIM + ZN + INDUS + CHAS + {(10*NOX)**2} + &"
model, parsed_formula = spreg.from_formula(formula, boston_df, w=weights, debug=True)
print(type(model))
print(parsed_formula)
print(model.summary)

# Error model with ML estimation
formula = "log(CMEDV) ~ {RM**2} + AGE + TAX + PTRATIO + {B/1000}" + \
          " + log(LSTAT) + CRIM + ZN + INDUS + CHAS + {(10*NOX)**2} + &"
model, parsed_formula = spreg.from_formula(formula, boston_df, method="full",
                                           w=weights, debug=True)
print(type(model))
print(parsed_formula)
print(model.summary)