In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import commonmodules as cm

filename = "N2H2_VVdata_3variables.xlsx"
df = pd.read_excel(filename)
debug = False
print(df.columns)

In [None]:
from sklearn.preprocessing import MinMaxScaler

x = df[['v', 'w', 'T(K)']].values
y = df[['k(cm^3/s)']].values

scalerx = MinMaxScaler()
scalerx.fit(x)
x_s = scalerx.transform(x)

vset = set(x_s[:,0])
wset = set(x_s[:,1])
tset = set(x_s[:,2])

scalery = MinMaxScaler()
scalery.fit(y)
y_s = scalery.transform(y)

if debug:
    for i, ys in enumerate(y_s):
        print(ys, y[i])
    for i, xs in enumerate(x_s):
        print(xs, x[i])

In [None]:
from tensorflow import keras
import tensorflow as tf

import tensorflow.keras.optimizers as tko
import tensorflow.keras.activations as tka
import tensorflow.keras.losses as tkl
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

from sklearn.model_selection import train_test_split

def test_train_split (column, valuestotest, x, y):
    
    xtest = []
    ytest = []
    xtrain = []
    ytrain = []
    
    for v in valuestotest:
        for i, xv in enumerate(x[:,column]):
            if xv == v:
                xtest.append(x[i,:])
                ytest.append(y[i])
            else:
                xtrain.append(x[i,:])
                ytrain.append(y[i])   

    return np.asarray(xtrain), np.asarray(xtest), \
        np.asarray(ytrain), np.asarray(ytest)

def buildmodel(modelshape):
    model = keras.Sequential()
    model.add(keras.layers.Dense(units = 3, activation = 'linear', input_shape=[3]))

    for n in modelshape:
        model.add(keras.layers.Dense(units = n, activation = 'relu'))

    model.add(keras.layers.Dense(units = 1, activation = 'linear'))
    model.compile(loss='mse', optimizer="adam", metrics='mse')

    return model

def plotfull3dcurve (columntorm, x, y):

    yv = []
    xv = []
    for i, v in enumerate(x):
        toappend = []
        for j in range(len(v)):
            if j != columntorm:
                toappend.append(v[j])
        xv.append(toappend)
        yv.append(y[i]) 

    X = np.array(xv)
    Y = np.array(yv)

    x1set = sorted(list(set(X[:,0])))
    x2set = sorted(list(set(X[:,1])))

    x1dim = len(x1set)
    x2dim = len(x2set)

    Xp = np.zeros((x1dim, x2dim), dtype=float)
    Yp = np.zeros((x1dim, x2dim), dtype=float)
    Zp = np.zeros((x1dim, x2dim), dtype=float)
    for x1idx in range(x1dim):
        x1 = x1set[x1idx]
        for x2idx in range(x2dim):
            x2 =  x2set[x2idx]
            Xp[x1idx, x2idx] = float(x1)
            Yp[x1idx, x2idx] = float(x2)

            zval = None
            for i in range(X.shape[0]):
                if X[i,0] == x1 and X[i,1] == x2:
                    zval = Y[i]
                    break

            Zp[x1idx, x2idx] = zval

    #fig = plt.figure(figsize=(10,8))
    fig = plt.figure(figsize=plt.figaspect(2.))
    plt.gcf().set_size_inches(40, 30)
    ax = fig.add_subplot(2,1,1, projection='3d')
    surf = ax.plot_surface(Xp, Yp, Zp, rstride=1, cstride=1, cmap='jet', linewidth=0, antialiased=False)
    plt.show()

In [None]:
# chek data
for w in wset:
    train_x, test_x, train_y, test_y = test_train_split (1, [w], x_s, y_s)

    plotfull3dcurve (1, test_x, test_y)

In [None]:
from sklearn import metrics

ofp = open("perc.csv", "w")

print (" Perc. Split , Test MSE , Test R2 , Train MSE , Train R2")
print (" Perc. Split , Test MSE , Test R2 , Train MSE , Train R2", file=ofp)
for perc in [0.05, 0.10, 0.25, 0.30, 0.50]:
    train_x, test_x, train_y, test_y = train_test_split(x_s, y_s, \
                    test_size=perc, random_state=42)

    modelshape = [32, 32, 32, 32]
    epochs = 10
    batch_size = 50

    model = buildmodel(modelshape)
    history = model.fit(train_x, train_y, epochs=epochs,  batch_size=batch_size, \
        verbose=0)

    pred_y = model.predict(test_x, verbose=0)
    #to scale back y
    #pred_y_sb = scalery.inverse_transform(pred_y)
    #y_sb = scalery.inverse_transform(test_y)
    #plt.scatter(y_sb, pred_y_sb)
    #plt.show()
    testmse = metrics.mean_absolute_error(test_y, pred_y)
    testr2 = metrics.r2_score(test_y, pred_y)

    pred_y = model.predict(train_x, verbose=0)
    trainmse = metrics.mean_absolute_error(train_y, pred_y)
    trainr2 = metrics.r2_score(train_y, pred_y)

    print("%5.2f , %10.6f , %10.6f , %10.6f , %10.6f"%(perc, testmse, testr2, \
                                                       trainmse,  trainr2))
    print("%5.2f , %10.6f , %10.6f , %10.6f , %10.6f"%(perc, testmse, testr2, \
                                                       trainmse,  trainr2), file=ofp)
    
ofp.close()

In [None]:
ofp = open("vremoved.csv", "w")

thefirst = True
print (" v Removed , Test MSE , Test R2 , Train MSE , Train R2")
print (" v Removed , Test MSE , Test R2 , Train MSE , Train R2", file=ofp)
for v in vset:
    train_x, test_x, train_y, test_y = test_train_split (0, [v], x_s, y_s)

    modelshape = [32, 32, 32, 32]
    epochs = 10
    batch_size = 50

    if thefirst:
        model = buildmodel(modelshape)
        history = model.fit(train_x, train_y, epochs=epochs,  batch_size=batch_size, \
            verbose=0)
        thefirst = False

    model = buildmodel(modelshape)
    history = model.fit(train_x, train_y, epochs=epochs,  batch_size=batch_size, \
        verbose=0)

    pred_y = model.predict(test_x, verbose=0)
    #to scale back y
    #pred_y_sb = scalery.inverse_transform(pred_y)
    #y_sb = scalery.inverse_transform(test_y)
    #plt.scatter(y_sb, pred_y_sb)
    #plt.show()
    testmse = metrics.mean_absolute_error(test_y, pred_y)
    testr2 = metrics.r2_score(test_y, pred_y)

    pred_y = model.predict(train_x, verbose=0)
    trainmse = metrics.mean_absolute_error(train_y, pred_y)
    trainr2 = metrics.r2_score(train_y, pred_y)

    print("%5.2f , %10.6f , %10.6f , %10.6f , %10.6f"%(v, testmse, testr2, \
                                                       trainmse,  trainr2))
    
    print("%5.2f , %10.6f , %10.6f , %10.6f , %10.6f"%(v, testmse, testr2, \
                                                       trainmse,  trainr2), file=ofp)
    
ofp.close()

In [None]:
ofp = open("wremoved.csv", "w")

thefirst = True

print (" w Removed , Test MSE , Test R2 , Train MSE , Train R2")
print (" w Removed , Test MSE , Test R2 , Train MSE , Train R2", file=ofp)
for w in wset:
    train_x, test_x, train_y, test_y = test_train_split (1, [w], x_s, y_s)

    modelshape = [32, 32, 32, 32]
    epochs = 10
    batch_size = 50

    if thefirst:
        model = buildmodel(modelshape)
        history = model.fit(train_x, train_y, epochs=epochs,  batch_size=batch_size, \
            verbose=0)
        thefirst = False

    model = buildmodel(modelshape)
    history = model.fit(train_x, train_y, epochs=epochs,  batch_size=batch_size, \
        verbose=0)

    pred_y = model.predict(test_x, verbose=0)
    #to scale back y
    #pred_y_sb = scalery.inverse_transform(pred_y)
    #y_sb = scalery.inverse_transform(test_y)
    #plt.scatter(y_sb, pred_y_sb)
    #plt.show()
    testmse = metrics.mean_absolute_error(test_y, pred_y)
    testr2 = metrics.r2_score(test_y, pred_y)

    pred_y = model.predict(train_x, verbose=0)
    trainmse = metrics.mean_absolute_error(train_y, pred_y)
    trainr2 = metrics.r2_score(train_y, pred_y)

    print("%5.2f , %10.6f , %10.6f , %10.6f , %10.6f"%(w, testmse, testr2, \
                                                       trainmse,  trainr2))
    
    print("%5.2f , %10.6f , %10.6f , %10.6f , %10.6f"%(w, testmse, testr2, \
                                                       trainmse,  trainr2), file=ofp)
    
ofp.close()

In [None]:
ofp = open("tremoved.csv", "w")

thefirst = True

print (" T Removed , Test MSE , Test R2 , Train MSE , Train R2")
print (" T Removed , Test MSE , Test R2 , Train MSE , Train R2", file=ofp)
for t in tset:
    train_x, test_x, train_y, test_y = test_train_split (2, [t], x_s, y_s)

    modelshape = [32, 32, 32, 32]
    epochs = 10
    batch_size = 50

    if thefirst:
        model = buildmodel(modelshape)
        history = model.fit(train_x, train_y, epochs=epochs,  batch_size=batch_size, \
            verbose=0)
        thefirst = False

    model = buildmodel(modelshape)
    history = model.fit(train_x, train_y, epochs=epochs,  batch_size=batch_size, \
        verbose=0)

    pred_y = model.predict(test_x, verbose=0)
    #to scale back y
    #pred_y_sb = scalery.inverse_transform(pred_y)
    #y_sb = scalery.inverse_transform(test_y)
    #plt.scatter(y_sb, pred_y_sb)
    #plt.show()
    testmse = metrics.mean_absolute_error(test_y, pred_y)
    testr2 = metrics.r2_score(test_y, pred_y)

    pred_y = model.predict(train_x, verbose=0)
    trainmse = metrics.mean_absolute_error(train_y, pred_y)
    trainr2 = metrics.r2_score(train_y, pred_y)

    print("%5.2f , %10.6f , %10.6f , %10.6f , %10.6f"%(t, testmse, testr2, \
                                                       trainmse,  trainr2))
    print("%5.2f , %10.6f , %10.6f , %10.6f , %10.6f"%(t, testmse, testr2, \
                                                       trainmse,  trainr2), file=ofp)
    
ofp.close()