In [14]:
%matplotlib notebook
import numpy as np
import pandas as pd
import random
from tqdm import tqdm_notebook
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
dataDf = pd.read_csv('data.csv')
x = dataDf['LATITUDE'].tolist()
y = dataDf['LONGITUDE'].tolist()
T = dataDf['ALTITUDE'].tolist()

In [3]:
def normalize(vec):
    
    mean = np.min(vec)
    denominator = max(vec) - min(vec)
    vec = (vec - mean) / denominator
    
    return vec

In [4]:
def split(x, y, T):
    
    X = [(x[i], y[i]) for i in range(len(x))]
    X_test, X_train, T_test, T_train = train_test_split(X, T, test_size = 0.8, random_state = 42)
    X_test, X_val, T_test, T_val = train_test_split(X_test, T_test, test_size = 0.5, random_state = 42)
    
    x_test = [X_test[i][0] for i in range(len(X_test))]
    y_test = [X_test[i][1] for i in range(len(X_test))]
    
    x_val = [X_val[i][0] for i in range(len(X_val))]
    y_val = [X_val[i][1] for i in range(len(X_val))]
    
    x_train = [X_train[i][0] for i in range(len(X_train))]
    y_train = [X_train[i][1] for i in range(len(X_train))]
    
    return x_train, y_train, T_train, x_val, y_val, T_val, x_test, y_test, T_test

In [5]:
def generate_features(x, y, N, deg):
    
    # the feature matrix
    # for each pow of y (dy), get all pow of x (dx), such that dx + dy = deg
    
    # [[1, x1, x1^2, y1, x1y1, y1^2]
    #  [1, x2, x2^2, y2, x2y2, y2^2]
    #   .  .
    #   .  .
    #  [1, xN, xN^2, yN, xNyN, yN^2]]
    featureMatrix = []
    
    # number of features
    d = 0
    
    for n in range(N):
        row = []
        for i in range(deg + 1):
            for j in range(deg - i + 1):
                term = (x[n]**j) * (y[n]**i)
                row.append(term)
                if n == 0:
                    d += 1
        featureMatrix.append(row)
    
    # converting to a numpy array
    featureMatrix = np.array(featureMatrix)
    
    return featureMatrix, d

In [6]:
def update_weights(W, features, error, eta):
    
    delta = features.dot(error)
    W = W - eta*delta
    
    return W

In [7]:
def show(W, error, d):
    
    for i in range(d):
        print(W[i])
    print('Error: ' + str(error) + '\n')

In [21]:
def generate_model(x, y, T, deg, maxIter, eta):
       
    N = len(x)
    featureMatrix, d = generate_features(x, y, N, deg)
    
    # initial weights vector
    # random initialization
    random.seed(12)
    W = np.array([random.random() for i in range(d)])
    # zero initialization
    # W = np.array([0 for i in range(d)])
    
    prevError = 0
    currentError = 0
    errors = []
    
    for i in tqdm_notebook(range(maxIter)):
        
        H = featureMatrix.dot(W)
        E = 0.5 * (H - T).dot(np.transpose(H - T))
        prevError = currentError
        currentError = E
        errors.append(currentError)
        #=======
        # Perform termination check here using prevError and currentError
        #=======
        # show(W, currentError, d)
        W = update_weights(W, np.transpose(featureMatrix), H - T, eta)
        
    return W, errors

In [9]:
normalizedx = normalize(x)
normalizedy = normalize(y)
normalizedT = normalize(T)

In [None]:
# generate_model(x, y, T, 1, 100, 0.000000001)

# x_train, y_train, T_train, x_val, y_val, T_val, x_test, y_test, T_test = split(x, y, T)
x_train, y_train, T_train, x_val, y_val, T_val, x_test, y_test, T_test = split(normalizedx, normalizedy, normalizedT)

W, errors = generate_model(x_train, y_train, T_train, 2, 100, 0.000002)
print(W)
print(errors[len(errors) - 1])
plt.plot([i for i in range(len(errors))], errors)

# Unnormalized

## Degree = 2
### Max iterations = 10,000
### eta = 0.0000000000001

0.4744234599520871      1 <br>
0.6474382918705535      x <br>
0.0005804929584987999   x^2 <br>
0.14105769027351664     y <br>
-0.09403124677459616    xy <br>
0.3544625440157649      y^2 <br>

Error:  75436974.66365339

---------------------------------------------------

## Degree = 1
### Max iterations = 10,000
### eta = 0.000000001

0.5185508389987425       1 <br>
0.15099146093306134      x <br>
1.3394518499352581       y <br>

Error:  75282294.06572284

---
---

# Normalized

## Degree = 2
### Max iterations = 1,000
### eta = 0.000002

0.15263109107088374 1 <br>
-0.046585920675156704 x <br>
0.21581304287314088 x^2 <br>
0.3174109858230444 y <br>
-0.39821638060691267 xy <br>
-0.1120204638382443 y^2 <br>

Error: 3492.6023185812373

---------------------------------------------------

## Degree = 1
### Max iterations = 1,000
### eta = 0.000002

0.20875515071358333      1 <br>
-0.10010713951478299     x <br>
0.09525453491739086      y <br>

Error: 3585.734618153786