In [3]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from mpl_toolkits.mplot3d import Axes3D
from itertools import product
from scipy import interpolate, stats
import pandas as pd
from scipy.spatial import ConvexHull, convex_hull_plot_2d
from mpl_toolkits.mplot3d import Axes3D


In [50]:
data = pd.read_csv(r"C:\Users\Edwin\Downloads\test_df")
data = data.drop(data.index[0]) # drop dumb header
data = data[data['400.0nm'] < 0.2]
# data = data[data['400.0nm'] > 0.05]
# data = data.iloc[::3]

In [51]:
# pull data
pfh = np.asarray(pd.to_numeric(data['Component 3 wtf'] , errors='coerce')[1:-1])
ethanol = np.asarray(pd.to_numeric(data['Component 4 wtf'] , errors='coerce')[1:-1])
absorbance = np.asarray(pd.to_numeric(data['400.0nm'] , errors='coerce')[1:-1])


In [60]:
# collected data
x1 = ethanol#np.array([-8.0, -6.00, -7.0, -4.0, 2.0, 5.0, 1.0, 3.0, 7.0])
x2 = pfh #np.array([-8.0, -3.0, 2.0, 4.0, 3.0, 7.0, -1.0, -4.0, -7.0])
y_data = absorbance #np.array([-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0])

x1_min = min(x1) - min(x1)*0.2
x1_max = max(x1) + max(x1)*0.2

x2_min = min(x2) - min(x2)*0.2
x2_max = max(x2) + max(x2)*0.4

y_min = min(y_data)
y_max = max(y_data)

# test data to make mesh
x1t = np.linspace(x1_min,x1_max,100) #np.arange(x1_min,x1_max,0.1) # need to automate resolution, but if not use lispace
x2t = np.linspace(x2_min,x2_max,100) #np.arange(x2_min,x2_max,0.1)
x1x2 = np.array([x1,x2]).T 

In [61]:
kernal = C(1, (1e-1, 1e3)) * RBF(1, (1e-3, 1e3)) 
gp_model = GaussianProcessRegressor(kernel=kernal, n_restarts_optimizer=10, alpha=0.075, normalize_y=True) # random_state=42
gp_model.fit(x1x2, y_data)



GaussianProcessRegressor(alpha=0.075, kernel=1**2 * RBF(length_scale=1),
                         n_restarts_optimizer=10, normalize_y=True)

In [62]:
x1x2t = np.array(list(product(x1t, x2t))) # just to make points in the frame and size of mesh
d1 = x1t.shape[0] 
d2 = x2t.shape[0]

y_pred, MSE = gp_model.predict(x1x2t, return_std=True) 

X1 = x1x2t[:,0].reshape(d2,d1) # reshape is essential to make (n,2) to (n,n)
X2 = x1x2t[:,1].reshape(d2,d1) 
Y = np.reshape(y_pred,(d2,d1))

In [63]:
fig, ax = plt.subplots(1)
norm =colors.Normalize(vmin=y_min,vmax=y_max) # is this normalization correct?
mappable = ax.pcolormesh(X1,X2,Y)
fig.colorbar(mappable)
plt.scatter(x1,x2,norm=norm,c=y_data,edgecolors='k')

<matplotlib.collections.PathCollection at 0x20fd3c82518>

In [64]:
%matplotlib qt
# make sure nornmailzing color bar appr
# NOTE: vmin/vmax supercedes norm, vmin/vmax input is defaulted to colors.Normalize linear normazilation

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
norm =colors.Normalize(vmin=y_min,vmax=y_max)
ax.scatter(x1,x2,y_data,c=y_data, norm=norm, cmap='jet')
surf = ax.plot_surface(X=X1, Y=X2, Z=Y, norm=norm, rstride=1, alpha =0.1, cstride=1, cmap='jet', linewidth=0, antialiased=False)
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel('Ethanol')
ax.set_ylabel('PFH')
ax.set_zlabel('AU')

Text(0.5, 0, 'AU')

In [65]:
"""why is infinately going in the same color in one direction? is it because the sampling is not 
cubic as in its weighing in one direction? It seems to only be modeling in one direction rather than two?
But this seems to be an artifact of the data and not the modeling since test data works fine, but that is sparse 

I wonder if data is just random if it will just make that same gradient"""

'why is infinately going in the same color in one direction? is it because the sampling is not \ncubic as in its weighing in one direction? It seems to only be modeling in one direction rather than two?\nBut this seems to be an artifact of the data and not the modeling since test data works fine, but that is sparse \n\nI wonder if data is just random if it will just make that same gradient'

In [58]:
# TESTING
%matplotlib qt
# so the way it goes is the vmin/vmax can go in either in the norm or the direct plot, but it always superceeds even if diff norm fed
# them this norm is fed into a mappable to make a colormap for a color bar, fed into the colorbar
# then do not forget to feed the norm into both a pcolormesh AND figure/plot. 

# NOTE: A colormap is NOT A mappable color map, rather its just a fancy way to make a collection of colors
# try to l earn what edge colors are
x_space = np.linspace(x1_min,x1_max,1000)  
y_space = np.linspace(x2_min,x2_max,1000)
xx, yy = np.meshgrid(x_space,y_space)

cartcoord =  np.asarray([ethanol,pfh]).T
interp = interpolate.LinearNDInterpolator(cartcoord, absorbance)
Z0 = interp(xx,yy)

fig, ax = plt.subplots(1)
vmin = min(absorbance)
vmax = max(absorbance)
norm =colors.Normalize(vmin,vmax)
mappable = ax.pcolormesh(xx,yy,Z0, norm=norm)
# the question is should you normalize around your data or mesh? 

cbar = fig.colorbar(mappable)
plt.scatter(ethanol,pfh,c=absorbance,norm=norm) #theoretically could misamtch norm but dont
plt.show()