In [46]:
%matplotlib notebook
import numpy as np
import pandas as pd
import random
from tqdm import tqdm_notebook
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [47]:
dataDf = pd.read_csv('data.csv')
x = np.array(dataDf['LATITUDE'].tolist())
y = np.array(dataDf['LONGITUDE'].tolist())
T = np.array(dataDf['ALTITUDE'].tolist())

In [60]:
def normalize(vec):
    
    mean = np.mean(vec)
    denominator = max(vec) - min(vec)
    vec = (vec - mean) / denominator
    
    return vec

In [61]:
def split(x, y, T):
    
    X = [(x[i], y[i]) for i in range(len(x))]
    X_train, X_test, T_train, T_test = train_test_split(X, T, test_size = 0.2, random_state = 0)
    X_test, X_val, T_test, T_val = train_test_split(X_test, T_test, test_size = 0.5, random_state = 42)
    
    x_test = np.array([X_test[i][0] for i in range(len(X_test))])
    y_test = np.array([X_test[i][1] for i in range(len(X_test))])
    
    x_val = np.array([X_val[i][0] for i in range(len(X_val))])
    y_val = np.array([X_val[i][1] for i in range(len(X_val))])
    
    x_train = np.array([X_train[i][0] for i in range(len(X_train))])
    y_train = np.array([X_train[i][1] for i in range(len(X_train))])
                      
    return x_train, y_train, T_train, x_val, y_val, T_val, x_test, y_test, T_test

In [62]:
def generate_features(x, y, N, deg):
    
    # the feature matrix
    # for each pow of y (dy), get all pow of x (dx), such that dx + dy = deg
    
    # [[1, x1, x1^2, y1, x1y1, y1^2]
    #  [1, x2, x2^2, y2, x2y2, y2^2]
    #   .  .
    #   .  .
    #  [1, xN, xN^2, yN, xNyN, yN^2]]
    
    featureMatrix = []
    
    # number of features
    d = 0
    
    for n in range(N):
        row = []
        for i in range(deg + 1):
            for j in range(deg - i + 1):
                term = (x[n]**j) * (y[n]**i)
                row.append(term)
                if n == 0:
                    d += 1
        featureMatrix.append(row)
    
    # converting to a numpy array
    featureMatrix = np.array(featureMatrix)
    
    return featureMatrix, d

In [63]:
def update_weights(W, features, error, eta, lamb):
    
    delta = features.dot(error)
    delta += lamb*W
    W = W - eta*delta
    
    return W

In [64]:
def show(W, error, d):
    
    for i in range(d):
        print(W[i])
    print('Error: ' + str(error) + '\n')

In [66]:
def predict(W, X):
    
    return X.dot(W)

In [80]:
def generate_model(x, y, T, deg, maxIter, eta, lamb):
       
    N = len(x)
    featureMatrix, d = generate_features(x, y, N, deg)
    
    # initial weights vector
    # random initialization
    random.seed(12)
    W = np.array([random.random() for i in range(d)])
    # zero initialization
    # W = np.array([0 for i in range(d)])
    
    prevError = 0
    currentError = 0
    errors = []
    
    for i in tqdm_notebook(range(maxIter)):
        
        H = predict(W, featureMatrix)
        E = (0.5*(H - T).dot(np.transpose(H - T))) + (0.5*lamb*sum(W*W))
        prevError = currentError
        currentError = E
        errors.append(currentError)
        #=======
        # Perform termination check here using prevError and currentError
        #=======
        # print(0.5*lamb*sum(W*W))
        # show(W, currentError, d)
        W = update_weights(W, np.transpose(featureMatrix), H - T, eta, lamb)
        
    return W, errors

In [67]:
def calc_R2(T, H):
    
    tss = sum((T - np.mean(T))*(T - np.mean(T)))
    rss = sum((T - H)*(T - H))
    
    return 1 - (rss/tss)

In [68]:
def calc_rmse(T, H):
    
    se = sum((T - H)*(T - H))
    mse = se/len(T)
    rmse = mse ** 0.5
    
    return rmse

In [69]:
normalizedx = normalize(x)
normalizedy = normalize(y)
normalizedT = normalize(T)

In [82]:
# x_train, y_train, T_train, x_val, y_val, T_val, x_test, y_test, T_test = split(x, y, T)
x_train, y_train, T_train, x_val, y_val, T_val, x_test, y_test, T_test = split(normalizedx, normalizedy, normalizedT)
# x_train, y_train, T_train = normalizedx, normalizedy, normalizedT
# x_train, y_train, T_train = x, y, T

# W, errors = generate_model(x_train, y_train, T_train, 1, 10000, 0.000002, 100000)
# plt.plot([i for i in range(len(errors))], errors)

# print(x_train[0:10], y_train[0:10])

for i in range(1,2):
    print('For degree ', i)
    W, errors = generate_model(x_train, y_train, T_train, i, 1000, 0.000003, 1)
    print('Final loss: ', errors[len(errors) - 1])
    X, num = generate_features(x_val, y_val, len(x_val), i)
    H = predict(W, X)
    print('R2 error: ', calc_R2(T_val, H))    
    print('RMS error: ', calc_rmse(T_val, H))
    print(W)
    print('\n\n')

For degree  1


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))


Final loss:  2858.0973146260258
R2 error:  0.02604773334836763
RMS error:  0.12866164484053036
[-0.00022325 -0.10005647  0.09361553]





# Unnormalized

## Degree = 2
### Max iterations = 10,000
### eta = 0.0000000000001

0.4744234599520871      1 <br>
0.6474382918705535      x <br>
0.0005804929584987999   x^2 <br>
0.14105769027351664     y <br>
-0.09403124677459616    xy <br>
0.3544625440157649      y^2 <br>

Error:  75436974.66365339

---------------------------------------------------

## Degree = 1
### Max iterations = 10,000
### eta = 0.000000001

0.5185508389987425       1 <br>
0.15099146093306134      x <br>
1.3394518499352581       y <br>

Error:  75282294.06572284

---
---

# Normalized

## Degree = 2
### Max iterations = 1,000
### eta = 0.000002

0.15263109107088374 1 <br>
-0.046585920675156704 x <br>
0.21581304287314088 x^2 <br>
0.3174109858230444 y <br>
-0.39821638060691267 xy <br>
-0.1120204638382443 y^2 <br>

Error: 3492.6023185812373

---------------------------------------------------

## Degree = 1
### Max iterations = 1,000
### eta = 0.000002

0.20875515071358333      1 <br>
-0.10010713951478299     x <br>
0.09525453491739086      y <br>

Error: 3585.734618153786