In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.graphics.regressionplots import (
    plot_partregress_grid,
    influence_plot,
    plot_regress_exog,
)

# Generate example data
np.random.seed(42)
n = 10000
X = np.random.randn(n, 3)  # 3 independent variables
beta = np.array([0.5, -2, 1])  # Coefficients for the independent variables
y = X @ beta + np.random.randn(n) * 0.5  # Dependent variable with some noise

# Create a DataFrame
data = pd.DataFrame(X, columns=["x1", "x2", "x3"])
data["y"] = y

# Fit a linear regression model
X_with_const = sm.add_constant(data[["x1", "x2", "x3"]])
model = sm.OLS(data["y"], X_with_const).fit()

# Scatter Plot Matrix
sns.pairplot(data)
plt.suptitle("Scatter Plot Matrix", y=1.02)
plt.show()

# Residuals vs. Fitted Values Plot
fitted_values = model.fittedvalues
residuals = model.resid
plt.scatter(fitted_values, residuals, alpha=0.3)
plt.axhline(0, color="red", linestyle="--")
plt.xlabel("Fitted Values")
plt.ylabel("Residuals")
plt.title("Residuals vs. Fitted Values")
plt.show()

# Histogram of Residuals
sns.histplot(residuals, kde=True)
plt.title("Histogram of Residuals")
plt.show()

# Q-Q Plot of Residuals
sm.qqplot(residuals, line="45")
plt.title("Q-Q Plot of Residuals")
plt.show()

# Cook's Distance Plot
influence = model.get_influence()
cooks_d = influence.cooks_distance[0]
plt.stem(np.arange(len(cooks_d)), cooks_d, markerfmt=",")
plt.title("Cook's Distance Plot")
plt.xlabel("Observation Index")
plt.ylabel("Cook's Distance")
plt.show()

# Partial Regression Plots
fig, ax = plt.subplots(figsize=(12, 8))
plot_partregress_grid(model, fig=fig)
plt.show()