In [None]:
# If needed in Colab:
# !pip install statsmodels scipy

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import statsmodels.formula.api as smf
import statsmodels.api as sm
from scipy.stats import shapiro

np.random.seed(5551212)




In [None]:
# ---------------------------------------------------------
# Generate data: non-linear relationship between x and y
# ---------------------------------------------------------
n = 1000
x = np.random.chisquare(df=2, size=n)
y = 5 + 3 * x + 2 * x**2 + np.random.normal(loc=0, scale=10, size=n)

dat = pd.DataFrame({"x": x, "y": y})

# Scatterplot of y vs x
plt.figure(figsize=(6, 4))
plt.scatter(dat["x"], dat["y"], alpha=0.5)
plt.xlabel("x")
plt.ylabel("y")
plt.title("Scatterplot of y vs x")
plt.show()



In [None]:
# ---------------------------------------------------------
# Fit a linear model: y ~ x
# ---------------------------------------------------------
f_linear = smf.ols("y ~ x", data=dat).fit()
print("\nLinear model (y ~ x) summary:")
print(f_linear.summary())



In [None]:
# ---------------------------------------------------------
# Diagnostic plots for linear model
# ---------------------------------------------------------

# Residuals vs fitted
plt.figure(figsize=(6, 4))
plt.scatter(f_linear.fittedvalues, f_linear.resid, alpha=0.5)
plt.axhline(0, color="k", linestyle="--")
plt.xlabel("Fitted values")
plt.ylabel("Residuals")
plt.title("Residuals vs Fitted (Linear model)")
plt.show()



In [None]:
# QQ-plot of residuals
sm.qqplot(f_linear.resid, line="s")  # "s" = standardized line, R-like
plt.title("QQ-plot of Residuals (Linear model)")
plt.show()



In [None]:
# Shapiro–Wilk test for normality of residuals
sh_stat, sh_p = shapiro(f_linear.resid)
print("\nShapiro–Wilk test for residuals (linear model):")
print(f"Statistic: {sh_stat:.4f}, p-value: {sh_p:.4g}")



In [None]:
# ---------------------------------------------------------
# Fit a model with a non-linear term: y ~ x + x^2
# ---------------------------------------------------------
f_nonlinear = smf.ols("y ~ x + I(x ** 2)", data=dat).fit()
print("\nNonlinear model (y ~ x + x^2) summary:")
print(f_nonlinear.summary())



In [None]:
# ---------------------------------------------------------
# Diagnostic plots for nonlinear model
# ---------------------------------------------------------

# Residuals vs fitted
plt.figure(figsize=(6, 4))
plt.scatter(f_nonlinear.fittedvalues, f_nonlinear.resid, alpha=0.5)
plt.axhline(0, color="k", linestyle="--")
plt.xlabel("Fitted values")
plt.ylabel("Residuals")
plt.title("Residuals vs Fitted (Nonlinear model)")
plt.show()



In [None]:
# QQ-plot of residuals
sm.qqplot(f_nonlinear.resid, line="s")
plt.title("QQ-plot of Residuals (Nonlinear model)")
plt.show()