# Recreation of Section 5 of Cleveland's Paper

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import Loess

In [None]:
df = pd.read_csv("dataset.csv")
df.head()

In [None]:
# The first two columns are unnecessary (index)
df.drop(df.columns[[0, 1]], axis = 1, inplace = True)
df.head()

## Figure 3

In [None]:
labels = ["radiation", "ozone", "temperature", "wind"]
fig, axs = plt.subplots(4, 4, constrained_layout=True)
fig.set_size_inches(7, 7)
for i in range(4):
    for j in range(4):
        axs[i, j].axes.xaxis.set_visible(False)
        axs[i, j].axes.yaxis.set_visible(False)
        if i == j:
            axs[i, j].plot()
            axs[i, j].text(.5, .5, labels[i].capitalize(),  horizontalalignment='center', verticalalignment='center', transform=axs[i, j].transAxes, fontsize=13)
        else:
            if i == 0 or i == 3:
                axs[i, j].axes.xaxis.set_visible(True)
            if i == 0:
                axs[i, j].xaxis.tick_top()
            if j == 0 or j == 3:
                axs[i, j].axes.yaxis.set_visible(True)
            if j == 3:
                axs[i, j].yaxis.tick_right()
            axs[i, j].scatter(df[labels[j]], df[labels[i]], s=15, color="black")

## Figure 4

In [None]:
# Perform a linear estimation with f = 0.4
X = df[["radiation", "temperature", "wind"]]
y = df[["ozone"]]
loess = Loess.Loess(X, y, 1)
estimations = []
for i in range(len(X)):
    x = np.array(X.iloc[[i]])[0]
    estimations.append(loess.estimate(x, 0.4))
df["linear_0.4_estimation"] = estimations
df["linear_0.4_residual"] = df["ozone"] - df["linear_0.4_estimation"]

In [None]:
fig, ax = plt.subplots()
ax.set_box_aspect(1)
res = stats.probplot(df["linear_0.4_residual"], plot=plt, fit=False)
ax.get_lines()[0].set_color("black")
ax.get_lines()[0].set_markerfacecolor("white")
ax.get_lines()[0].set_markersize(4.0)
plt.ylim(-50, 75)
plt.title("")
plt.xlabel("Normal Quantiles")
plt.ylabel("Residuals")
plt.show()

In [None]:
df.sort_values(by="linear_0.4_estimation", inplace=True)
X = df[["linear_0.4_estimation"]]
y = abs(df[["linear_0.4_residual"]])
loess.reset_xx(X)
loess.reset_yy(y)
estimations = []
for i in range(len(X)):
    x = np.array(X.iloc[[i]])[0]
    estimations.append(loess.estimate(x, 2/3))

In [None]:
fig, ax = plt.subplots()
ax.set_box_aspect(1)
plt.scatter(df["linear_0.4_estimation"], abs(df["linear_0.4_residual"]), facecolors='none', edgecolors='black', s=20)
plt.plot(X, estimations, color="black")
plt.ylim(0, 75)
plt.xlabel("Fitted Values")
plt.ylabel("Absolute Residuals")

In [None]:
df.sort_values(by="radiation", inplace=True)
X = df[["radiation"]]
y = df[["linear_0.4_residual"]]
loess.reset_xx(X)
loess.reset_yy(y)
estimations = []
for i in range(len(X)):
    x = np.array(X.iloc[[i]])[0]
    estimations.append(loess.estimate(x, 2/3))

In [None]:
fig, ax = plt.subplots()
ax.set_box_aspect(1)
plt.scatter(df["radiation"], df["linear_0.4_residual"], facecolors='none', edgecolors='black', s=20)
plt.plot(X, estimations, color="black")
plt.xlabel("Solar Radiation")
plt.ylabel("Residuals")

In [None]:
df.sort_values(by="temperature", inplace=True)
X = df[["temperature"]]
y = df[["linear_0.4_residual"]]
loess.reset_xx(X)
loess.reset_yy(y)
estimations = []
for i in range(len(X)):
    x = np.array(X.iloc[[i]])[0]
    estimations.append(loess.estimate(x, 2/3))

In [None]:
fig, ax = plt.subplots()
ax.set_box_aspect(1)
plt.scatter(df["temperature"], df["linear_0.4_residual"], facecolors='none', edgecolors='black', s=20)
plt.plot(X, estimations, color="black")
plt.xlabel("Temperature")
plt.ylabel("Residuals")

In [None]:
df.sort_values(by="wind", inplace=True)
X = df[["wind"]]
y = df[["linear_0.4_residual"]]
loess.reset_xx(X)
loess.reset_yy(y)
estimations = []
for i in range(len(X)):
    x = np.array(X.iloc[[i]])[0]
    estimations.append(loess.estimate(x, 2/3))

In [None]:
fig, ax = plt.subplots()
ax.set_box_aspect(1)
plt.scatter(df["wind"], df["linear_0.4_residual"], facecolors='none', edgecolors='black', s=20)
plt.plot(X, estimations, color="black")
plt.xlabel("Wind Speed")
plt.ylabel("Residuals")
plt.ylim(-50, 75)

## Figure 6

In [None]:
X = df[["radiation", "temperature", "wind"]]
y = df[["ozone"]]
y_cube_root = np.power(y, 1/3)
loess = Loess.Loess(X, y_cube_root, 2)
loess.reset_xx(X)
loess.reset_yy(y_cube_root)
loess.reset_degree(2)

In [None]:
# Fixed wind speed and radiation
wind_speed = [5, 10, 15]
temperature = np.linspace(60, 95, 100)
radiation = [290, 170, 50]

fig, axs = plt.subplots(3, 3, constrained_layout=True)
fig.set_size_inches(7, 7)

estimations = []
for i, wind in enumerate(wind_speed):
    for j, rad in enumerate(radiation):
        estimations = []
        for temp in temperature:
            x = np.array([rad, temp, wind])
            estimations.append(loess.estimate(x, 0.8))
        if j != 0:
            axs[i, j].axes.yaxis.set_visible(False)
        if i != 2:
            axs[i, j].axes.xaxis.set_visible(False)
        axs[j, i].set_ylim(0, 7)
        axs[j, i].plot(temperature, estimations, color="black")

## Figure 7

In [None]:
# Fixed wind speed and temperature
wind_speed = [5, 10, 15]
temperature = [90, 76, 62]
radiation = np.linspace(0, 300, 100)

fig, axs = plt.subplots(3, 3, constrained_layout=True)
fig.set_size_inches(7, 7)

estimations = []
for i, wind in enumerate(wind_speed):
    for j, temp in enumerate(temperature):
        estimations = []
        for rad in radiation:
            x = np.array([rad, temp, wind])
            estimations.append(loess.estimate(x, 0.8))
        if j != 0:
            axs[i, j].axes.yaxis.set_visible(False)
        if i != 2:
            axs[i, j].axes.xaxis.set_visible(False)
        axs[j, i].set_ylim(0, 7)
        axs[j, i].plot(radiation, estimations, color="black")

## Figure 8

In [None]:
# Fixed temperature and radiation
wind_speed = np.linspace(3, 18, 100)
temperature = [62, 76, 90]
radiation = [290, 170, 50]

fig, axs = plt.subplots(3, 3, constrained_layout=True)
fig.set_size_inches(7, 7)

estimations = []
for i, temp in enumerate(temperature):
    for j, rad in enumerate(radiation):
        estimations = []
        for wind in wind_speed:
            x = np.array([rad, temp, wind])
            estimations.append(loess.estimate(x, 0.8))
        if j != 0:
            axs[i, j].axes.yaxis.set_visible(False)
        if i != 2:
            axs[i, j].axes.xaxis.set_visible(False)
        axs[j, i].set_ylim(0, 7)
        axs[j, i].plot(wind_speed, estimations, color="black")