### Simple hierarchical linear model (HLM) 
Evan Edwards

The HLM is: pvmath1 = 1 + female +  escs + random intercept for school



In [None]:
# Imports
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score, mean_squared_error

In [None]:
# Load dataset
PISA2018 = pd.read_csv("pisa2018.BayesBook.csv")

In [None]:
# Data processing: converting categorical values to numerical values
PISA2018['Female'] = PISA2018['Female'].replace({'Female': 1, 'Male': 0})

In [None]:
# The formula, model defaults to random intercepts per group
formula = "PV1MATH ~ C(Female) + ESCS"

# Creating the model
model = smf.mixedlm(formula, groups=PISA2018['SchoolID'], data=PISA2018)

# Getting the fit model
result = model.fit()

# Printing the model summary
print(result.summary())

In [None]:
# Creating the PV1MATH predictions using the model, and calculating the R^2 score and MSE as metrics of accuracy

# results.predict() uses only the fixed effects, so the random effects per group are added
fixed_pred = result.predict(PISA2018)
re_pred = fixed_pred + np.concatenate(PISA2018['SchoolID'].map(result.random_effects))

# Printing the accuracy metrics
print(f'The R^2 score of the fixed effects model is: {r2_score(PISA2018["PV1MATH"], fixed_pred)}')
print(f'The MSE of the fixed effects model is: {mean_squared_error(PISA2018["PV1MATH"], fixed_pred)}')

print(f'The R^2 score of the model is: {r2_score(PISA2018["PV1MATH"], re_pred)}')
print(f'The MSE of the model is: {mean_squared_error(PISA2018["PV1MATH"], re_pred)}')


In [None]:
# Plotting the predicitons of the training data in 3D
# Black is the fixed effects, no random intercepts
# Red is the fixed effects + random intercepts
# Blue is the actual values
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.view_init(elev=20, azim=30)
ax.scatter3D(PISA2018["Female"], PISA2018["ESCS"], pred, s=0.2, c="black")
ax.scatter3D(PISA2018["Female"], PISA2018["ESCS"], PISA2018["PV1MATH"], s=0.2)
ax.scatter3D(PISA2018["Female"], PISA2018["ESCS"], pred_re, s=0.2, c="red")
ax.set_xlabel("Female")
ax.set_ylabel('ESCS')
ax.set_zlabel("PV1MATH")
ax.set_xticks([0, 1])
ax.set_box_aspect(None, zoom=0.85)
plt.show()