In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn.neighbors import KNeighborsRegressor

In [None]:
X = np.random.uniform(size=(20,1))
Y = 2 * X + 5 + 0.2 * np.random.normal(size = X.shape)
p = plt.scatter(X, Y)
plt.xlabel('Observed value')
plt.ylabel('Target value')

In [None]:
k = 1
kNN = KNeighborsRegressor(n_neighbors=k)
kNN.fit(X,Y)
Xprime = np.random.uniform(size=(10000,1))
Xprime = np.sort(Xprime, axis=0)
Yprime = kNN.predict(Xprime)
plt.plot(Xprime, Yprime, label="predictions", color = 'black')
plt.scatter(X, Y, label="data")
plt.legend()
plt.xlabel('Observed value')
plt.ylabel('Target / Predicted value')


In [None]:
Ks = [1,5,10,20]
figs, axes = plt.subplots(2, int((len(Ks) + 1)/2))
axs = axes.flatten()
for i in range(0,len(Ks)):
    k = Ks[i]
    
    kNN = KNeighborsRegressor(n_neighbors=k)
    kNN.fit(X,Y)
    Xprime = np.random.uniform(size=(10000,1))
    Xprime = np.sort(Xprime, axis=0)
    Yprime = kNN.predict(Xprime)
    axs[i].plot(Xprime, Yprime, label="predictions", color = 'black')
    axs[i].scatter(X, Y, label="data")
    axs[i].set_title("k = " + str(k))
    axs[i].set_xlabel('Observed value')
    axs[i].set_ylabel('Target/Prediction')
plt.tight_layout()


# Increased sample size

In [None]:
def draw(m):
    X = np.random.uniform(size=(m,1))
    Y = 2 * X + 5 + 0.2 * np.random.normal(size = X.shape)
    p = plt.scatter(X, Y)
    plt.xlabel('Observed value')
    plt.ylabel('Target value')

    Ks = [1,5,10,20]
    figs, axes = plt.subplots(2, int((len(Ks) + 1)/2))
    axs = axes.flatten()
    for i in range(0,len(Ks)):
        k = Ks[i]
    
        kNN = KNeighborsRegressor(n_neighbors=k)
        kNN.fit(X,Y)
        Xprime = np.random.uniform(size=(10000,1))
        Xprime = np.sort(Xprime, axis=0)
        Yprime = kNN.predict(Xprime)
        axs[i].scatter(X, Y, label="data")
        axs[i].plot(Xprime, Yprime, label="predictions", color = 'black')
        axs[i].set_title("k = " + str(k))
        axs[i].set_xlabel('Observed value')
        axs[i].set_ylabel('Target/Prediction')

    plt.tight_layout()


In [None]:
draw(20)

# linear regression

In [None]:
X = np.random.uniform(size=(50,1))
Y = 2 * X + 5 + 0.2 * np.random.normal(size = X.shape)
p = plt.scatter(X, Y)
plt.xlabel('Observed value')
plt.ylabel('Target value')


In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
reg = LinearRegression().fit(X, Y)
print("slope = ", reg.coef_[0][0], " offset = ", reg.intercept_[0])
plt.scatter(X, Y)
plt.plot([0, 1],[reg.intercept_[0], reg.coef_[0][0] + reg.intercept_[0]], color="black")
plt.xlabel('Observed value')
plt.ylabel('Target / Predicted value')


# Non-linearity

In [None]:
X = np.random.uniform(size=(150,1))
Y = 1000 * X**2 + 2 * X + 5 + 25 * np.random.normal(size = X.shape)
p = plt.scatter(X, Y)
reg = LinearRegression().fit(X, Y)
print("slope = ", reg.coef_[0][0], " offset = ", reg.intercept_[0])
plt.scatter(X, Y, color= "orange")
plt.plot([0, 1],[reg.intercept_[0], reg.coef_[0][0] + reg.intercept_[0]], color="black")
plt.xlabel('Observed value')
plt.ylabel('Target / Predicted value')


In [None]:
X2 = np.c_[X, X**2]
reg = LinearRegression().fit(X2, Y)
print("slope = ", reg.coef_[0], " offset = ", reg.intercept_[0])
plt.scatter(X, Y, color = "orange")

Xprime = np.sort(np.random.uniform(size=(150,1)), axis=0)
Xprime2 = np.c_[Xprime, Xprime * Xprime]
Yprime = reg.predict(Xprime2)
plt.plot(Xprime, Yprime, color = "black")
plt.xlabel('Observed value')
plt.ylabel('Target / Predicted value')


# Corona

In [None]:
sick = [16, 20, 22, 37, 39, 59, 77, 99, 130, 164, 200, 253, 318, 421, 524,677, 838, 943, 1207, 1552, 2000,2463, 3011, 3403, 3824, 4247, 4695, 5358, 6092]

X= np.array(range(0,len(sick))).reshape(-1, 1)
Y = sick
reg = LinearRegression().fit(X, Y)
print("slope = ", reg.coef_[0], " offset = ", reg.intercept_)
plt.plot(X, Y, color= "orange")
plt.plot([0, len(sick) - 1],[reg.intercept_, (len(sick) - 1)* reg.coef_[0] + reg.intercept_], color="black")
plt.title('linear regression')
plt.xlabel('Day')
plt.ylabel('Number of infections')


In [None]:

X= np.array(range(0,len(sick))).reshape(-1, 1)
Y = np.log(sick)
reg = LinearRegression().fit(X, Y)
print("slope = ", reg.coef_[0], " offset = ", reg.intercept_)
plt.plot(X, Y, color= "orange")
plt.plot([0, len(sick) - 1],[reg.intercept_, (len(sick) - 1)* reg.coef_[0] + reg.intercept_], color="black")
plt.title('linear regression - log transform')
plt.xlabel('Day')
plt.ylabel('Number of infections')


In [None]:
plt.plot(X, sick, color= "orange")
Yprime = np.exp(reg.predict(X))
plt.plot(X, Yprime, color="black")
plt.title('linear regression - exp of log transform')
plt.xlabel('Day')
plt.ylabel('Number of infections')


In [None]:
kNN = KNeighborsRegressor(n_neighbors=1)
kNN.fit(X,sick)
Yprime = kNN.predict(X)
plt.plot(X, sick, color= "orange")
plt.plot(X,Yprime, color="black")
plt.title('kNN')
plt.xlabel('Day')
plt.ylabel('Number of infections')


In [None]:

X= np.array(range(0,len(sick))).reshape(-1, 1)
Y = np.log(sick)
reg = LinearRegression().fit(X[:15], Y[:15])
print("slope = ", reg.coef_[0], " offset = ", reg.intercept_)
plt.plot(X, Y, color= "orange")
plt.plot([0, len(sick) - 1],[reg.intercept_, (len(sick) - 1)* reg.coef_[0] + reg.intercept_], color="black")
plt.title('Linear Regression (15 days) - log transform')
plt.xlabel('Day')
plt.ylabel('Number of infections')


In [None]:
plt.plot(X, sick, color= "orange")
Yprime = np.exp(reg.predict(X))
plt.plot(X, Yprime, color="black")
plt.title('linear regression - (15 days) exp of log transform')
plt.xlabel('Day')
plt.ylabel('Number of infections')


In [None]:
kNN = KNeighborsRegressor(n_neighbors=1)
kNN.fit(X[:15],sick[:15])
Yprime = kNN.predict(X)
plt.plot(X, sick, color= "orange")
plt.plot(X,Yprime, color="black")
plt.title('kNN - 15 days')
plt.xlabel('Day')
plt.ylabel('Number of infections')


In [None]:
X= np.array(range(0,len(sick))).reshape(-1, 1)
Y = sick
reg = LinearRegression().fit(X[:15], Y[:15])
print("slope = ", reg.coef_[0], " offset = ", reg.intercept_)
plt.plot(X, Y, color= "orange")
plt.plot([0, len(sick) - 1],[reg.intercept_, (len(sick) - 1)* reg.coef_[0] + reg.intercept_], color="black")
plt.title('linear regression - 15 days')
plt.xlabel('Day')
plt.ylabel('Number of infections')
