In [34]:
# Imports
import math as m
import pandas as pd
import numpy as np
import scipy as sp
import os
my_plotly_api_key = os.environ.get('MY_PLOTLY_API_KEY') # retrive api_key from operating system
import plotly
plotly.tools.set_credentials_file(username='agu3rra', api_key=my_plotly_api_key) # setting up credentials; Plotly is an online service.
import plotly.plotly as py # import graphics library
import plotly.graph_objs as go
from scipy import integrate

# Data loading
dfCoordinates = pd.read_csv('gpsDataUTM.csv') # reads data into dataframe

#-----#-----#-----#-----#-----#-----#-----#-----#-----#-----#-----#-----#-----#
# Calculate volume of earth from the lowest measured point
# Idea: generate training set to a Neural Network and treat it as a regression problem

# Part 1: Generate training set
# Idea: generate a set of mathematical surface functions z=f(x,y) that go by
# close to the points we're getting from GPS. Use these funcitons to generate a
# set of (x,y,z) points and a double integral to calculate the exact valume of
# the solid delimited by it.

# Better idea: maybe the model can generelize from general space, no need to actually find functions that have values close to our sample set.
# The only remaining challenge is to select only a subset of points that match the input data size (random selection)

In [35]:
# Determine bounds of interest
x_max = dfCoordinates['x_rel'].max()
y_max = dfCoordinates['y_rel'].max()
z_max = dfCoordinates['z_rel'].max()

In [36]:
# Remember: in terrain measurements, all x, y and z values will be positive
# Dev note: increasing number of samples may be required
x = np.linspace(0.0,x_max,num=1000) # generate linear space for x values
y = np.linspace(0.0,y_max,num=1000)
xGrid, yGrid = np.meshgrid(x,y) # generate mesh grid for plotting sample data

In [37]:
# Generate a equations and corresponding double integrals
# Data of interest: functions with variations bounded by 0:*_max values

# Dev note: How many double integral evaluations are needed for the model to be able to generalize well? Starting at 1000
integrate.dblquad(lambda y, x: 3 + x**2 - 2*y, 0.0, x_max, lambda x: -x, lambda x: x)

(1175537925.0989282, 1.3051092707649128e-05)

In [45]:
def combination(n,k): # Combination of n samples taken k at a time.
    return m.factorial(n)/(m.factorial(n-k)*m.factorial(k))

In [48]:
combination(156,3) # there are 620620 planes defined by all 156 distinct points in my sample data

620620.0

In [57]:
def combinations(iterable, r):
    # combinations('ABCD', 2) --> AB AC AD BC BD CD
    # combinations(range(4), 3) --> 012 013 023 123
    pool = tuple(iterable)
    n = len(pool)
    if r > n:
        return
    indices = list(range(r))
    yield tuple(pool[i] for i in indices)
    while True:
        for i in reversed(range(r)):
            if indices[i] != i + n - r:
                break
        else:
            return
        indices[i] += 1
        for j in range(i+1, r):
            indices[j] = indices[j-1] + 1
        yield tuple(pool[i] for i in indices)

In [61]:
c = combinations('ABCD',2)

In [62]:
for i in c:
    print(i)

('A', 'B')
('A', 'C')
('A', 'D')
('B', 'C')
('B', 'D')
('C', 'D')


In [None]:
# return a list with each of the 156 points from pandas dataframe and apply combination
# define all plane equations that can be obtained from combination of each point 3 at a time.
# evaluate bounded numerical integral of each plane
# select a subset of points that match that bounded space on the evaluated integral and match with corresponding volume calculation (double integral result)
# Your training set will be comprised of 620.620 sets of 156 points (156x3 = 468 inputs) and corresponding volume.
# Train your model on a neural network