### Introduction
This code generates data that follows a function, then attempts to predict the function using KFold validation to determine the optimal model.

In [None]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import initializers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Precision
from tensorflow.keras.metrics import Recall


from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import KFold


import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

### Generating Data
Two functions can be used, defined with the "function" switch input.
The data is then split into train and test data

In [None]:
#Generate data
def generate_data(size,function):
    #Create data given size:
    dimensions = (size,1)    
    if function == 1:
        x = np.random.uniform(low=-1,high=1,size=dimensions)
        f = lambda x: x*math.sin(6*math.pi*x)*math.exp(-(x**2))
    else:
        x = np.random.uniform(low=-2,high=2,size=dimensions)
        f = lambda x: math.exp(-(x**2))*math.atan(x)*math.sin(4*math.pi*x)
        
    fun = np.vectorize(f)
    y = fun(x)
    
    #Split into Train/Validate (80%) and Test (20%)
    x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)
    return (x_train,y_train), (x_test,y_test)


(x_train,y_train),(x_test,y_test) = generate_data(10000,1)

### Neural Network
The NN is defiend with KFold and early stopping for validation.

In [None]:
#Define Neural Network:
#one hidden layer with variable nodes; sigmoid activation function
def compile_model(no_hidden_nodes):
    model = keras.models.Sequential()
    #hidden layer
    ki=keras.initializers.RandomNormal(mean=0., stddev=30)
    bi=keras.initializers.RandomNormal(mean=0., stddev=10)
    model.add(Dense(
        no_hidden_nodes,
        activation='sigmoid',
        input_shape=(1,),
        kernel_initializer=ki,
        bias_initializer=bi)) 

    #output layer
    model.add(Dense(1, activation='linear')) #output layer

    optimizer = Adam(lr=0.01)
    model.compile(loss='mean_squared_error', optimizer=optimizer)

model_best_train = np.zeros((4,4))
model_best_validate = np.zeros((4,4))
model_avg_train = np.zeros((4,4))
model_avg_validate = np.zeros((4,4))

kfold = KFold(n_splits=10,shuffle=True)
for i,size in enumerate(size_data):
    inputs = x_train[:size]
    targets = y_train[:size]
    
    for j,node_num in enumerate(hidden_nodes):
        print(f'Model {i},{j}')
        fold_no = 1
        model_training_error = []
        model_validation_error = []

#         for i in range(5):
        #     Shuffle data
        for train_idx, validate_idx in kfold.split(inputs):
            inputs_train, inputs_val = inputs[train_idx], inputs[validate_idx]
            targets_train, targets_val = targets[train_idx], targets[validate_idx]

            compile_model(node_num)
            print('------------------------------------------------------------------------')
            print(f'Training for fold {fold_no} ...')

            #Fit to model
            es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=200)
            h = model.fit(
                inputs_train, targets_train,
                validation_data=(inputs_val,targets_val),
                epochs=10,
                batch_size=10,
                verbose=0,
                callbacks=[es])
            
            #Metrics
            training_score = model.evaluate(inputs_train, targets_train, verbose=0)
            validation_score = model.evaluate(inputs_val, targets_val, verbose=0)

            model_training_error.append(training_score * 100)
            model_validation_error.append(validation_score * 100)

            #Increase fold number
            fold_no = fold_no + 1
                        

        model_best_train[i][j] = min(model_training_error)
        model_best_validate[i][j] = min(model_validation_error)
        model_avg_train[i][j] = sum(model_training_error)/len(model_training_error)
        model_avg_validate[i][j] = sum(model_validation_error)/len(model_validation_error)



### Selected Model
In this case, the model M32 is selected as the most generalzied model (least overfit) given the validation error. The full training set is applied to the model. Then the testing data is applied

In [None]:
#Generate validation/training graph for Model M32

inputs = x_train[:200] #M32
for train_idx, validate_idx in kfold.split(inputs):
    inputs_train, inputs_val = inputs[train_idx], inputs[validate_idx]
    targets_train, targets_val = targets[train_idx], targets[validate_idx]
    
    compile_model(40) #M32

    #Fit to model
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=200)
    h = model.fit(
        inputs_train, targets_train,
        validation_data=(inputs_val,targets_val),
        epochs=100,
        batch_size=10,
        verbose=0,
        callbacks=[es])

plt.plot(np.log10(h.history['loss']))
plt.plot(np.log10(h.history['val_loss']), 'r')
plt.legend(['train loss', 'val loss'])

In [None]:
#Apply Full dataset to Model 02


model = keras.models.Sequential()
#hidden layer
ki=keras.initializers.RandomNormal(mean=0., stddev=30)
bi=keras.initializers.RandomNormal(mean=0., stddev=10)
model.add(Dense(
    40,
    activation='sigmoid',
    input_shape=(1,),
    kernel_initializer=ki,
    bias_initializer=bi)) 

#output layer
model.add(Dense(1, activation='linear')) #output layer

optimizer = Adam(lr=0.01)
model.compile(loss='mean_squared_error', optimizer=optimizer)
    
h = model.fit(
    x_train, y_train,
    epochs=10,
    batch_size=10,
    verbose=0,
)


In [None]:
#Check test data
y_pred = model(x_test)
plt.scatter(x_test,y_test,label="Original function")

plt.scatter(x_test,y_pred,label='Model prediction with test data')
plt.legend()