In [3]:
import pandas as pd
import numpy as np
import geopandas as gpd
from pysal.model import mgwr
# from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns

import statsmodels.api as sm
import matplotlib

import libpysal
import spreg
import esda

import os

In [4]:
df = pd.read_csv("../data/modeling.csv")
gdf = gpd.read_file("../data/shp/merge_all.shp")

In [5]:
df['Geography'] = df['Geography'].astype(str)
merge = pd.merge(gdf, df, left_on="geoid20", right_on="Geography", how="right")

len(merge)

286

In [6]:
x_1 = merge[[
        'Geography',
         'his_num_311_per_property',
         'neighbor_his_num_per_property',
         'snow_depth',
         'percent_below_poverty',
         'percent_civilian_unemployed',
         'per_capita_income',
         'percent_no_highschool',
         'percent_65older',
         'percent_17younger',
         'percent_household_disability',
         'percent_single_parent_household',
         'percent_minority',
         'percent_notwell_english',
         'percent_singleunits',
         'percent_multiunit',
         'percent_mobile_homes',
         'percent_owneroccupiedunit',
         'percent_crowding',
         'percent_group_quarters',
         'median_year_properties_built',
         'median_value_properties_built',
         'percent_no_vehicle',
         '311_index_per_property'
        ]]

x_1 = x_1.set_index("Geography")
y = x_1.pop("311_index_per_property")

# Standardization function
def standarize_data(data, stats):
    return (data - stats['mean'])/ stats['std']

x_1_stats = x_1.describe().transpose()
x_2 = standarize_data(x_1,x_1_stats)

w = libpysal.weights.Queen.from_dataframe(merge)
moran = esda.Moran(merge["311_index_per_property"], w)
print("moran I: " + str(moran.I))

y_name = "311_index_per_property"
y = np.array(merge["311_index_per_property"]).T

x_names = [
         'his_num_311_per_property',
         'neighbor_his_num_per_property',
         'snow_depth',
         'percent_below_poverty',
         'percent_civilian_unemployed',
         'per_capita_income',
         'percent_no_highschool',
         'percent_65older',
         'percent_17younger',
         'percent_household_disability',
         'percent_single_parent_household',
         'percent_minority',
         'percent_notwell_english',
         'percent_singleunits',
         'percent_multiunit',
         'percent_mobile_homes',
         'percent_owneroccupiedunit',
         'percent_crowding',
         'percent_group_quarters',
         'median_year_properties_built',
         'median_value_properties_built',
         'percent_no_vehicle'
]

x = np.array([x_2[var] for var in x_names]).T

# lag model
lag_model = spreg.ML_Lag(
    y,
    x,
    w=w,
    name_y=y_name,
    name_x=x_names
)

moran I: 0.2107144748390494


  w = libpysal.weights.Queen.from_dataframe(merge)


In [7]:
lag_model.pr2

0.5503878771787745

In [8]:
y_pred = lag_model.predy
rmse = mean_squared_error(y , y_pred, squared=False)
print("RMSE of GWR: ", rmse)

RMSE of GWR:  3.5681180959567116




In [9]:
print(lag_model.summary)

REGRESSION RESULTS
------------------

SUMMARY OF OUTPUT: MAXIMUM LIKELIHOOD SPATIAL LAG (METHOD = FULL)
-----------------------------------------------------------------
Data set            :     unknown
Weights matrix      :     unknown
Dependent Variable  :311_index_per_property                Number of Observations:         286
Mean dependent var  :     10.9277                Number of Variables   :          24
S.D. dependent var  :      5.3303                Degrees of Freedom    :         262
Pseudo R-squared    :      0.5504
Spatial Pseudo R-squared:  0.5346
Log likelihood      :   -771.3945
Sigma-square ML     :     12.7315                Akaike info criterion :    1590.789
S.E of regression   :      3.5681                Schwarz criterion     :    1678.533

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     z-Statistic     Probability
-----------------------------------------------------