In [2]:
import math
import matplotlib.pyplot as plt
import numpy as np

In [3]:
# read data into memory
data_set_train = np.genfromtxt("hw03_data_set_train.csv", delimiter = ",", skip_header = 1)
data_set_test = np.genfromtxt("hw03_data_set_test.csv", delimiter = ",", skip_header = 1)

# get x and y values
x_train = data_set_train[:, 0]
y_train = data_set_train[:, 1]
x_test = data_set_test[:, 0]
y_test = data_set_test[:, 1]

# x_train = np.array([(i+0.5)/4 for i in range(1, 24)])
# y_train = np.array([i**2 for i in range(1, 24)])

In [4]:
# set drawing parameters
minimum_value = 0
maximum_value = 6
x_interval = np.arange(start = minimum_value, stop = maximum_value, step = 0.001)

In [5]:
def regressogram_helper(x_train, h):
    # Create borders from the given length h
    min_x = min(x_train)
    max_x = max(x_train)

    borders = np.arange(min_x, max_x + h, h)
    
    return borders


print(regressogram_helper(x_train, 0.2))

[1.6 1.8 2.  2.2 2.4 2.6 2.8 3.  3.2 3.4 3.6 3.8 4.  4.2 4.4 4.6 4.8 5.
 5.2]


In [6]:
def regressogram(x_train, y_train, h):

    # generate borders
    bins = regressogram_helper(x_train, h)

    # create empty array for the y values
    y_values = np.zeros(len(bins) - 1)

    # iterate over the bins
    for i in range(len(bins) - 1):
        # get the left and right border
        left_border = bins[i]
        right_border = bins[i + 1]

        # get the values in the bin
        values_in_bin = [y_train[j] for j in 
                         range(len(x_train)) if left_border <= x_train[j] < right_border]

        # calculate the mean of the values in the bin
        y_values[i] = np.mean(values_in_bin)

    def model(x):
        for i in range(len(bins) - 1):
            if (bins[i] <= x) and (x < bins[i + 1]):
                return y_values[i]
        print("Point out of range")
        return 70
    
    return model

In [7]:
model = regressogram(x_train, y_train, 0.2)

In [8]:
def plot_figure(x_train, y_train, model, borders):
    fig = plt.figure(figsize = (8, 4))
    
    # plot training data
    plt.plot(x_train, y_train, "b.", markersize = 10)


    # plot model
    y_model = []
    for x in x_interval:
        y_model.append(model(x))

    plt.plot(x_interval, y_model)

    # plot borders
    for border in borders:
        plt.axvline(x = border, color = "r", linestyle = "--")

    plt.xlim([1.55, 5.15])
    plt.xlabel("Time (sec)")
    plt.ylabel("Signal (millivolt)")
    plt.legend(["training", "test"])
    plt.show()



In [9]:
def run_test_regressogram():
    h=0.35
    model = regressogram(x_train, y_train, h)
    #plot_figure(x_train, y_train, model, regressogram_helper(x_train, h))
    y_test_hat = [model(x) for x in x_test]
    rmse = np.sqrt(np.mean((y_test - y_test_hat)**2))
    print("RMSE: ", rmse)
run_test_regressogram()

RMSE:  6.010365397341137


In [10]:
print(regressogram_helper(x_train, 0.35))

[1.6  1.95 2.3  2.65 3.   3.35 3.7  4.05 4.4  4.75 5.1 ]
