In [845]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from random import randint
from math import sqrt

In [855]:
def read(name):
    with open(str(name) + ".txt", "r") as file:
        labels = int(file.readline())
        train = [[float(x) for x in file.readline().split()] for _ in range(int(file.readline()))]
        test  = [[float(x) for x in file.readline().split()] for _ in range(int(file.readline()))]
        
    return labels, np.asarray(train), np.asarray(test)


def split(df):
    ys = df[:, -1]
    xs = df[:, :-1]
    xs = np.append(xs, [[1]] * len(xs), axis=1)
    return xs, ys


def NRSME(orig, predicted):
    return sqrt(sum(map(lambda p: (p[0] - p[1])**2, zip(orig, predicted))) / len(orig)) / (max(orig) - min(orig))


def svd(x_train, y_train):
    v, d, ut = np.linalg.svd(x_train, full_matrices=False)
    vt = v.transpose()
    di = np.linalg.inv(np.diag(d))
    u = ut.transpose()
    In = np.identity(len(d))
    return u @ di @ vt @ y_train


def stochastic_gradient_descent(x_train, y_train, labels, n=2000, h=0.001):
    w = [0] * (labels + 1)
    for i in range(n):
        r = randint(0, len(x_train) - 1)
        x = x_train[r]
        y = y_train[r]
        gradient = 2 * (y - (w @ x)) * (-1) * x

        w -= gradient * h
        h /= 1.001
    return w


def run(i):
    labels, train, test = read(i)

    scaler = StandardScaler()
    scaler.fit(train)
    train = scaler.transform(train)
    test = scaler.transform(test)

    x_train, y_train = split(train)
    x_test, y_test = split(test)

    print("Least squares:               ", NRSME(y_test, x_test @ svd(x_train, y_train)))
    print("Stochastic gradient descent: ", NRSME(y_test, x_test @ stochastic_gradient_descent(x_train, y_train, labels)))

In [856]:
for i in range(7):
    run(i + 1)

Least squares:                6.581200838544606e-05
Stochastic gradient descent:  0.00012671381357376297
Least squares:                6.039958826789279e-06
Stochastic gradient descent:  0.006055867087906975
Least squares:                0.00220704358697072
Stochastic gradient descent:  0.0006157434510500933
Least squares:                0.00022320442109284398
Stochastic gradient descent:  0.021946480593433498
Least squares:                3.878491707762429e-08
Stochastic gradient descent:  0.009441777550975042
Least squares:                0.0005031535966597205
Stochastic gradient descent:  0.00012400553367328658
Least squares:                1.3541913242900425e-06
Stochastic gradient descent:  0.00017484921558394483
