# Wonky Tops
### Marina Ten, Floris Strijbos, Tom Wade

In [None]:
import pandas as pd
import numpy as np
import ipyvolume as ipv
import bruges
import os

In [None]:
cwd = os.getcwd()
print(cwd)

In [None]:
#Set the location of the input spreadsheet.
xlIn = "sample_data/11_2.xlsx"
xlPath = os.path.join(cwd, xlIn)

In [None]:
#Read Input tops data from excel file and get the headers
df = pd.read_excel(xlPath)

In [None]:
#Get a 'list' of all the unique tops found in the input dataset
pd.unique(df.Top)

In [None]:
#Create List of the Tops to be included:
tops_list = ['AS11.2','T AS11.2']

In [None]:
def extract_xyz(data, tops):
    """
    Takes the raw dataframe and a list of desired tops, and returns the X, Y and Z
    values as individual arrays (filtering on the tops in the tops list)
    """
    xyz=df.loc[df.Top.isin(tops_list), ['Easting','Northing','Depth_tvdss']].values
    return xyz[:,0] , xyz[:,1] , xyz[:,2]

In [None]:
x, y, z = extract_xyz(df, tops_list)

In [None]:
def data_extent(xIn , yIn, zIn):
    """
    Given the X, Y and Z data arrays, this function returns the data extent as a list in the form:
    [0] = xmin
    [1] = xmax
    [2] = ymin
    [3] = ymax
    [4] = zmin
    [z] = zmax
    """
    extent = []
    extent.append(x.min()) ; extent.append(x.max()) ; extent.append(y.min()) ; extent.append(y.max())
    extent.append(z.min()) ; extent.append(z.max())
    return extent

In [None]:
dExtent = data_extent(x,y,z)

In [None]:
print(dExtent)

### Initial processing to mathematically remove obvious outliers

In [None]:
def outliers_iqr(n):
    """
    The interquartile range (IQR), is a measure of statistical dispersion, being equal to the difference 
    between 75th and 25th percentiles, or between upper and lower quartiles, IQR = Q3 − Q1.
    It is a measure of the dispersion similar to standard deviation or variance, but is much more robust 
    against outliers.
    """
    quartile_1, quartile_3 = np.percentile(n, [25, 75])
    iqr = quartile_3 - quartile_1
    lower_bound = quartile_1 - (iqr * 1.5)
    upper_bound = quartile_3 + (iqr * 1.5)
    return np.where((n > upper_bound) | (n < lower_bound))

In [None]:
def apply_iqr(xIn, yIn, zIn):
    """
    Apply the interquartile range filtering given X,Y and Z arrays of the data
    Returns a 2d numpy array of filtered x, y and z
    """
    iqr_test = outliers_iqr(zIn)[0].tolist()
    x_smooth, y_smooth, z_smooth = np.delete(xIn,iqr_test), np.delete(yIn,iqr_test), np.delete(zIn,iqr_test)
    return np.column_stack((x_smooth, y_smooth, z_smooth))

In [None]:
iqr_data = apply_iqr(x,y,z)

In [None]:
def iplot(data, extent):
    """
    This Creates the interactive ipython plot, given an array of data.
    This can then be used to interactively edit the dataset
    """
    
    ipv.figure()
    s = ipv.scatter(data[:,0], data[:,1], data[:,2], marker='sphere', size=3)
    ipv.xlim(extent[0], extent[1])
    ipv.ylim(extent[2] ,extent[3])
    ipv.zlim(extent[4],extent[5])
    ipv.animation_control(s)# shows controls for animation controls
    ipv.selector_default()
    ipv.show()

In [None]:
iplot(iqr_data, dExtent)

### Iterative interactive editing, round1

In [None]:
#Select points in plot above interactively, using CTRL-R
s_sel=s.selected
k=s_sel[0].tolist()

In [None]:
#Pick up points edited in first 'outlier edit' funtion
x= x_smooth
y = y_smooth
z = z_smooth

In [None]:
#Delete the points
xdel=np.delete(x,k)
ydel=np.delete(y,k)
zdel=np.delete(z,k)

In [None]:
#Redraw the plot with the update pointset
ipv.figure()
sdel = ipv.scatter(xdel, ydel, zdel, marker='sphere', size=3)
ipv.xlim(xmin,xmax)
ipv.ylim(ymin,ymax)
ipv.zlim(-4000,-1500)
ipv.animation_control(s)# shows controls for animation controls
ipv.selector_default()
ipv.show()

### Second edit iteration

In [None]:
#Select points in plot above, using CTRTL-R. Then procede to select and delete:
k2=sdel.selected[0].tolist()

In [None]:
#Delete the selected points
xdel2=np.delete(xdel,k2)
ydel2=np.delete(ydel,k2)
zdel2=np.delete(zdel,k2)

In [None]:
#Redraw the plot. Presume no further edits after this
ipv.figure()
sdel2 = ipv.scatter(xdel2, ydel2, zdel2, marker='sphere', size=3)
ipv.xlim(xmin,xmax)
ipv.ylim(ymin,ymax)
ipv.zlim(-4000,-1500)
ipv.animation_control(s)# shows controls for animation controls
ipv.selector_default()
ipv.show()

### Grid up  edited data

In [None]:
#Reassign edited data to X,Y,Z variable
x, y, z = xdel2, ydel2, zdel2

### Grid Tops via minimum curvature
##### Note this was our first gridding approach. However, a depopulated appoach is going to be preferred...

In [None]:
#Create a grid outline from tops data extent. Start printing extent
print("Xmin = {} , XMax = {}\nYmin = {}, Ymax= {}\nZmin = {}, Zmax = {}".format(xmin, xmax, ymin, ymax, zmin, zmax))

In [None]:
#Create sensible grid outline of integers. Currently hardcoded...
extent = [596000, 628500, 6640000, 6700000] #Sensible numbers from observed Xmin and Ymin. Defines Grid
grid_int = 100 #Hard coded grid spacing
#Create arrays of the x and y axis
x_points = int((extent[1] - extent[0]) / grid_int) #Number of samples in the X-Direction
y_points = int((extent[3] - extent[2]) / grid_int) #Number of samples in the Y-Direction

In [None]:
#Create the grid. interval expressed by complex numbers
grid_x, grid_y = np.mgrid[extent[0]:extent[1]:complex(x_points), extent[2]:extent[3]:complex(y_points)]

In [None]:
#Create numpy arrays of point co-ordinates and then values from input data
points = np.array(list(zip(x, y)))
values = z

In [None]:
#Perform interpolation of data in to the specified grid using scipy
from scipy.interpolate import griddata
gridded = griddata(points, values, (grid_x, grid_y), method='cubic')

In [None]:
#Initial plot, Pre-Residuals. Can probs delete later
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(20,10))
plt.imshow(gridded.T, origin='lower', extent=extent, cmap='jet')
plt.colorbar()
plt.scatter(x, y, s=20, c=z)

plt.gray()
plt.colorbar()

### Residuals Extraction

In [None]:
import bruges #Utilise the coordinate transformation functions

In [None]:
# UTM coords of 3 unique inline, crossline locations. Taken from grid extents (realworld)
corners_xy = np.array([[596000, 6640000],
                       [596000, 6700000],
                       [628500, 6640000]])

# The inline, crossline locations you just provided. Also Grid extent, but as indices given by grid.shape
corners_ix = np.array([[0,  0],
                       [0, 600],
                       [325, 0]])

In [None]:
transform = bruges.transform.CoordTransform(corners_ix, corners_xy)

In [None]:
#Pissing about to  create lists of well-top coordinates (as currently in np.array). Then transform
def get_top_coords(xIn, yIn):
    xlist = xIn.tolist()
    ylist = yIn.tolist()
    merge = list(zip(xlist, ylist))
    coordinatesOut = []
    for i in merge:
        coordinatesOut.append(transform.reverse([i[0], i[1]]))
    xcors = np.array([item[0] for item in coordinatesOut])
    ycors = np.array([item[1] for item in coordinatesOut])
    return xcors , ycors

In [None]:
xCor, yCor = get_top_coords(x, y)

In [None]:
#Extract the value of the grid at the well top locations
z_gridded = gridded[(xCor, yCor)]

In [None]:
#Calculate the residuals by subtracting gridded value from input top value
residuals = z - z_gridded

In [None]:
#Put the residuals in to a dataframe for visualisation
d = {'x': x, 'y': y, 'z':z, 'z_gridded': z_gridded, 'residual': residuals}
res_df = pd.DataFrame(data=d)
res_df.head()

In [None]:
#Plot image of grid, this time with residual annotations
plt.figure(figsize=(20,12))
plt.imshow(gridded.T, origin='lower', extent=extent, cmap='jet')
plt.colorbar()

#Plot well tops and residuals
plt.scatter(x, y, s=20, c=z)

res_list = residuals.tolist()
res_list_txt = [str(round(i, 2)) for i in res_list]

for i, txt in enumerate(res_list_txt):
    plt.annotate(txt, (x[i], y[i]) , size=10)

### Residual Histogram

In [None]:
rng = np.nanmin(res_list), np.nanmax(res_list)
n, bins, _ = plt.hist(res_list, bins='auto',  range=rng)

In [None]:
bins = (bins[1:] + bins[:-1]) / 2

In [None]:
plt.bar(bins, n, width=0.5, color='g')

### Regrid using data with outliers kicked out

In [None]:
#Filter the input data on the results of tghe first gridding
condition1 = ( (res_df.residual > -5) & (res_df.residual < 5) )
condition2 = res_df.residual.isna()

filteredDf = res_df[condition1 | condition2]

In [None]:
#Pull data from the filtered pandas data frame into 1d arrays
xFilt, yFilt , zFilt = filteredDf.x , filteredDf.y , filteredDf.z

In [None]:
#Create numpy arrays of point co-ordinates and then values from input data
pointsFilt = np.array(list(zip(xFilt, yFilt)))
valuesFilt = zFilt

In [None]:
#Perform interpolation of data in to the specified grid using scipy
reGridded = griddata(pointsFilt, valuesFilt, (grid_x, grid_y), method='cubic')

In [None]:
#Determine array value limits for contour plotting
contourInt = 5
contourMin = round(int(np.nanmin(reGridded)), -1)
contourMax = round(int(np.nanmax(reGridded)), -1)
contours = np.arange(contourMin, contourMax, contourInt)

In [None]:
#Re-extract residuals
newXcor, newYcor = get_top_coords(xFilt, yFilt)

In [None]:
#Extract the value of the grid at the well top locations
z_reGridded = reGridded[(newXcor, newYcor)]
newResiduals = zFilt - z_reGridded

In [None]:
#Put the residuals in to a dataframe for visualisation
d_regrid = {'x': xFilt, 'y': yFilt, 'z':zFilt, 'z_regridded': z_reGridded, 'residual': newResiduals}
reGrid_df = pd.DataFrame(data=d_regrid)
reGrid_df

In [None]:
#Plot image of grid, this time with residual annotations
plt.figure(figsize=(30,12))
plt.imshow(reGridded.T, origin='lower', extent=extent, cmap='jet')
plt.colorbar()

#ContourLevels
plt.contour(reGridded.T, extent=extent, levels=contours, colors='k', linestyles='-', linewidths=1)

#Plot well tops and residuals
plt.scatter(xFilt, yFilt, s=20, c='gray')


new_res_list = newResiduals.tolist()
new_res_list_txt = [str(round(i, 2)) for i in new_res_list]


for i, txt in enumerate(new_res_list_txt):
    plt.annotate(txt, (x[i], y[i]) , size=10)

In [None]:
zg=reGridded.ravel()
print(zg)

In [None]:
xg=grid_x.tolist()
yg=grid_y.tolist()
datagrid = pd.DataFrame({'Easting': grid_x.ravel(), 'Northing': grid_y.ravel(), 'Values': -reGridded.ravel()}, columns=['Easting', 'Northing','Values'])

In [None]:
datagrid[np.isnan(datagrid)] = 0

In [None]:
datagrid.to_csv('11_2_regridded_c.csv')

In [None]:
#Update histogram
rng2 = np.nanmin(new_res_list), np.nanmax(new_res_list)
n, bins, _ = plt.hist(new_res_list, bins='auto',  range=rng2)

### KDTree

In [None]:
from scipy import spatial

In [None]:
#Create sensible grid outline of integers. Now hardcoded...
extent = [596000, 628500, 6640000, 6700000] #Sensible numbers from observed Xmin and Ymin
grid_int = 2000 #Hard coded grid spacing
#Create arrays of the x and y axis
x_points = int((extent[1] - extent[0]) / grid_int)
y_points = int((extent[3] - extent[2]) / grid_int)

In [None]:
#Create the grid. interval expressed by complex numbers
grid_x, grid_y = np.mgrid[extent[0]:extent[1]:complex(x_points), extent[2]:extent[3]:complex(y_points)]

In [None]:
#Create numpy arrays of point co-ordinates and then values from input data
points = np.array(list(zip(x, y)))
values = z

In [None]:
#create kd index
tree = spatial.KDTree(points.tolist())

In [None]:
keep=list(set(tree.query(list(zip(grid_x.ravel(), grid_y.ravel())))[1]))
xk,yk,zk = x[keep],y[keep],z[keep]

In [None]:
inds = [i for i in range(x.size)]
blinds = [i for i in inds if i not in keep]
xb, yb, zb = x[blinds] , y[blinds] , z[blinds]

In [None]:
plt.scatter(xb, yb, color = 'r', s=40, label='blind test')
plt.scatter(xk, yk, color='blue', s=20, label='keep')
plt.legend()

In [None]:
keepdist=list(set(tree.query(list(zip(grid_x.ravel(), grid_y.ravel())))[0]))

In [None]:
#Create numpy arrays of point co-ordinates and then values from input data
points = np.array(list(zip(xk, yk)))
values = zk

In [None]:
#Perform interpolation of data in to the specified grid using scipy
gridded = griddata(points, values, (grid_x, grid_y), method='cubic')

In [None]:
plt.figure(figsize=(20,10))
plt.imshow(gridded.T, origin='lower', extent=extent, cmap='jet')
plt.colorbar()


plt.scatter(xk, yk, s=10, c='k', label = 'Keep')
plt.scatter(xb, yb, s=10, c = 'r', label='Blind Test')
plt.legend()

### KD Residuals

In [None]:
gridded.shape

In [None]:
# The inline, crossline locations you just provided. Also Grid extent, but as indices given by grid.shape
corners_ix = np.array([[0,  0],
                       [0, 29],
                       [15, 0]])

In [None]:
transform = bruges.transform.CoordTransform(corners_ix, corners_xy)

In [None]:
#Re-extract residuals
kdXcor, kdYcor = get_top_coords(xk, yk)

In [None]:
#Extract the value of the grid at the well top locations
kdGridPoint = gridded[(kdXcor, kdYcor)]
kdResiduals = zk - kdGridPoint

In [None]:
kd_dict = {'x' : xk,
          'y' : yk,
          'z' : zk,
          'zkd' : kdGridPoint,
          'kdResidual' : kdResiduals} 

In [None]:
kd_df = pd.DataFrame(data=kd_dict)

In [None]:
#Update histogram
kd_res_list = kdResiduals.tolist()
rng3 = np.nanmin(kd_res_list), np.nanmax(kd_res_list)
n, bins, _ = plt.hist(kd_res_list, bins='auto',  range=rng3)

In [None]:
bins = (bins[1:] + bins[:-1]) / 2

In [None]:
plt.bar(bins, n, width=1, color='g')

In [None]:
#Blind Test Residuals
BkdXcor, BkdYcor = get_top_coords(xb, yb)

In [None]:
#Extract the value of the grid at the well top locations
BkdGridPoint = gridded[(BkdXcor, BkdYcor)]
BkdResiduals = zb - BkdGridPoint

In [None]:
#Update histogram
Bkd_res_list = BkdResiduals.tolist()
rng4 = np.nanmin(Bkd_res_list), np.nanmax(Bkd_res_list)
n, bins, _ = plt.hist(Bkd_res_list, bins='auto',  range=rng4)

In [None]:
bins = (bins[1:] + bins[:-1]) / 2

In [None]:
plt.bar(bins, n, width=1, color='g')

### 2d Fourier Transform

In [None]:
#Subset data to rectangle. In final version, will have already selected rectangle in ipyvolume phase

In [None]:
gridded.shape

In [None]:
ft = np.fft.fft2(gridded)