In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("dataset.csv")
data.head()

In [None]:
# first two columns are unnecessary (index)
data.drop(data.columns[[0, 1]], axis = 1, inplace = True)
data.head()

In [None]:
labels = ["radiation", "ozone", "temperature", "wind"]
fig, axs = plt.subplots(4, 4)
fig.set_size_inches(10, 10)
for i in range(4):
    for j in range(4):
        axs[i, j].axes.xaxis.set_visible(False)
        axs[i, j].axes.yaxis.set_visible(False)
        if i == j:
            axs[i, j].plot()
            axs[i, j].text(.5, .5, labels[i].capitalize(),  horizontalalignment='center', verticalalignment='center', transform=axs[i, j].transAxes, fontsize=13)
        else:
            if i == 0 or i == 3:
                axs[i, j].axes.xaxis.set_visible(True)
            if i == 0:
                axs[i, j].xaxis.tick_top()
            if j == 0 or j == 3:
                axs[i, j].axes.yaxis.set_visible(True)
            if j == 3:
                axs[i, j].yaxis.tick_right()
            axs[i, j].scatter(data[labels[j]], data[labels[i]], s=15)

In [None]:
X = data[["radiation", "temperature", "wind"]]
standarized_X = (X - X.mean()) / X.std()
Y = data[["ozone"]]

In [None]:
def tricubic(x):
    if abs(x) < 1:
        return (1 - (abs(x) ** 3)) ** 3
    else:
        return 0
plt.plot(np.linspace(-2, 2, 100), [tricubic(x) for x in np.linspace(-2, 2, 100)])

In [None]:
# q neighbors
f = 0.4 
q = int(f * len(data))
res = []
for i in range(len(data)):
    # get q neareast neighbors
    neighbors = X.iloc[np.argsort(np.linalg.norm(X - X.iloc[i], axis=1))[1:q+1]]

    # weights
    standarized_neighbors = (neighbors - neighbors.mean()) / neighbors.std()
    distances = np.linalg.norm(standarized_neighbors - standarized_X.iloc[i], axis=1)
    d = max(distances)
    weights = np.array([tricubic(x / d) for x in distances])
    W = np.diag(weights)
    
    # solve normal equation
    A = np.array(neighbors)
    b = np.array(Y.iloc[neighbors.index])
    x = np.linalg.inv(A.T @ W @ A) @ (A.T @ W @ b)
    res.append(np.array(X.iloc[i]) @ x)

In [None]:
data["prediction"] = np.array(res)
data["residual"] = data["ozone"] - data["prediction"]

In [None]:
fig, ax = plt.subplots()

plt.scatter(data["prediction"], abs(data["residual"]))
plt.xlabel("Fitted Values")
plt.ylabel("Absolute Residuals")

In [None]:
fig, ax = plt.subplots()
ax.set_box_aspect(1)
plt.scatter(data["radiation"], data["residual"])
plt.xlabel("Solar Radiation")
plt.ylabel("Residuals")

In [None]:
fig, ax = plt.subplots()
ax.set_box_aspect(1)
plt.scatter(data["temperature"], data["residual"])
plt.xlabel("Temperature")
plt.ylabel("Residuals")

In [None]:
fig, ax = plt.subplots()
ax.set_box_aspect(1)
plt.scatter(data["wind"], data["residual"])
plt.xlabel("Wind Speed")
plt.ylabel("Residuals")