In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

sys.path.insert(0,"../src/")
import SGD
import CostFunctions
import SGDTEST as SGDTEST

sys.path.insert(0,"../../project_1/src/")
from FrankeFunction import *
import stat_tools
import linear_regression

In [5]:
n = 500                # Number of data points
noise_scale = 0.2      # Size of artificial noise



# Generate data
x = np.random.uniform(0, 10, n)
y = np.random.uniform(0, 10, n)
z = 4*x**3 - 20*y**2
# z = FrankeFunction(x, y)

# Add standard normal noise:
#z = z + noise_scale * np.random.normal(0, 1, len(z))
x_train, x_test, y_train, y_test, z_train, z_test = train_test_split(x, y, z, test_size=0.2)

# Center the response 
z_train_intercept = np.mean(z_train)
z_train = z_train - z_train_intercept
z_test = z_test - z_train_intercept



In [6]:
# Create design matrices
degree = 3
X_train = linear_regression.design_matrix_2D(x_train, y_train, degree)
X_test = linear_regression.design_matrix_2D(x_test, y_test, degree)

# Scale design matrix according to the training data
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Remove the intercept
X_train_scaled = X_train_scaled[:,1:]
X_test_scaled = X_test_scaled[:,1:]

N_predictors = int((degree + 1) * (degree + 2) / 2) - 1 # Don't include intercept
w_init = np.random.randn(N_predictors) # Random initial weights
#w_init = [4,-2]

M = int(n/100)                 # Number of mini-batches in SGD
n_epochs = int(1e5)        # Number of epochs in SGD
learning_rate = 0.0001 # Learning rate of SGD


# Solve for optimal weights using OLS Cost function
w_OLS_Analytic = linear_regression.OLS_SVD_2D(X_train_scaled, z_train)

# weights from SGD

w_OLS_SGD = SGD.SGD(
    X_train_scaled, 
    z_train, 
    M, 
    w_init, 
    n_epochs, 
    learning_rate, 
    CostFunctions.OLS_cost_gradient)

w_OLS_SGD_TEST = SGDTEST.SGD(
    X_train_scaled, 
    z_train, 
    M, 
    w_init, 
    n_epochs, 
    learning_rate, 
    CostFunctions.OLS_cost_gradient)

print(w_OLS_Analytic)
print(w_OLS_SGD)
print(w_OLS_SGD_TEST)

[-3.12638804e-13  5.40012479e-13  2.04636308e-12 -1.13686838e-13
 -5.82055047e+02  1.13766490e+03  5.68434189e-13 -2.27373675e-13
  6.96331881e-13]
[-109.10063788 -130.8370166   389.54596538  -79.97278347 -238.44141305
  835.4051875   115.1562309   -45.86182382 -189.45868594]
[-109.07702417 -130.83644086  389.61823795  -79.99625089 -238.41102265
  835.46542267  115.09656487  -45.80075932 -189.53437111]


In [7]:
print(stat_tools.MSE(z_train,X_train_scaled @ w_OLS_Analytic))
print(stat_tools.MSE(z_train,X_train_scaled @ w_OLS_SGD_TEST))
print(stat_tools.MSE(z_train,X_train_scaled @ w_OLS_SGD))
print(stat_tools.MSE(z_test,X_test_scaled @ w_OLS_Analytic))
print(stat_tools.MSE(z_test,X_test_scaled @ w_OLS_SGD_TEST))
print(stat_tools.MSE(z_test,X_test_scaled @ w_OLS_SGD))

1.962320063617306e-24
773.3257131679987
773.5072631634798
1.8402037277306628e-24
962.3299152010176
962.6600788097523
