# GEP

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import random

In [2]:
dataset = 'kepsilon'

# Loading data - select which cases to include in the training/validation set (commented out cases are held out)
cases = ['DUCT_1100',
         'DUCT_1150',
         'DUCT_1250',
         'DUCT_1300',
         'DUCT_1350',
         'DUCT_1400',
         'DUCT_1500',
         'DUCT_1600',
         'DUCT_1800',
         #'DUCT_2000',
         'DUCT_2205',
         'DUCT_2400',
         'DUCT_2600',
         'DUCT_2900',
         'DUCT_3200',
         #'DUCT_3500',
         'PHLL_case_0p5',
         'PHLL_case_0p8',
         'PHLL_case_1p0',
         #'PHLL_case_1p2',
         'PHLL_case_1p5',
         'BUMP_h20',
         'BUMP_h26',
         'BUMP_h31',
         #'BUMP_h38',
         'BUMP_h42',
         'CNDV_12600',
         'CNDV_20580',
         'CBFS_13700'
         ]

In [3]:
#Convenient functions for loading dataset
def loadCombinedArray(cases,field):
    data = np.concatenate([np.load('../data/'+dataset+'/'+dataset+'_'+case+'_'+field + '.npy', allow_pickle=True) for case in cases])
    return data

In [4]:
def loadLabels(cases,field):
    data = np.concatenate([np.load('../data/'+'labels/'+case+'_'+field + '.npy') for case in cases])
    return data

In [5]:
print('Loading features and labels from the dataset: '+ dataset)

Loading features and labels from the dataset: kepsilon


In [6]:
#Load the set of ten basis tensors (N,10,3,3), from Pope "A more general effective-viscosity hypothesis" (1975).
Tensors = loadCombinedArray(cases,'Tensors')
print('Shape of basis tensor array: '+str(Tensors.shape))

Shape of basis tensor array: (791490, 10, 3, 3)


In [7]:
#Load the 47 invariants (N,47) used by Wu et al. "Physics-informed machine learning approach for augmenting turbulence models: A comprehensive framework" (2018)
Invariants = loadCombinedArray(cases,'I1')
print('Shape of invariant features array: '+str(Invariants.shape))

Shape of invariant features array: (791490, 47)


In [8]:
# #Load the additional scalars (N,5): 
# Scalars = loadCombinedArray(cases,'q')
# print('Shape of scalar features array: '+str(Scalars.shape))

In [9]:
Features = Invariants.copy()
print('Shape of combined features array: '+str(Features.shape))

Shape of combined features array: (791490, 47)


# Data Processing

In [10]:
def remove_outliers(Features):
    stdev = np.std(Features,axis=0)
    means = np.mean(Features,axis=0)
    ind_drop = np.empty(0)
    for i in range(len(Features[0,:])):
        ind_drop = np.concatenate((ind_drop,np.where((Features[:,i]>means[i]+5*stdev[i]) | (Features[:,i]<means[i]-5*stdev[i]) )[0]))
    return ind_drop.astype(int)

outlier_removal_switch = 0
if outlier_removal_switch == 1:
    outlier_index = remove_outliers(Features)
    print('Found '+str(len(outlier_index))+' outliers in the input feature set')
    Features = np.delete(Features,outlier_index,axis=0)
    Tensors = np.delete(Tensors,outlier_index,axis=0)
    Labels = np.delete(Labels,outlier_index,axis=0)


In [11]:
#Load the label set from DNS/LES:
Labels = loadLabels(cases,'b')
#If desired, reshape the 3x3 symmetric anisotropy tensor into a 1x6 vector
# Labels = np.delete(Labels.reshape((len(Labels),9)),[3,6,7],axis=1)
print('Shape of DNS/LES labels array: '+str(Labels.shape))

Shape of DNS/LES labels array: (791490, 3, 3)


In [12]:
indices = np.arange(Features.shape[0])

In [13]:
x_train, x_val, y_train, y_val, ind_train, ind_val = train_test_split(Features, Labels, indices, test_size=0.2, random_state=10, shuffle=True)

In [14]:
basis_train = Tensors[ind_train]
basis_val = Tensors[ind_val]

In [15]:
# scaler = StandardScaler()
# x_train = scaler.fit_transform(x_train)
# x_val = scaler.transform(x_val)

In [16]:
print(' ')
print('Training features:')
print(x_train.shape)
print('Training tensor basis:')
print(basis_train.shape)
print('Training labels:')
print(y_train.shape)
print(' ')
print('Validation features:')
print(x_val.shape)
print('Validation tensor basis:')
print(basis_val.shape)
print('Validation labels:')
print(y_val.shape)
print(' ')

 
Training features:
(633192, 47)
Training tensor basis:
(633192, 10, 3, 3)
Training labels:
(633192, 3, 3)
 
Validation features:
(158298, 47)
Validation tensor basis:
(158298, 10, 3, 3)
Validation labels:
(158298, 3, 3)
 


# GEP

In [17]:
class Operator:
    def __init__(self, name, func):
        self.name = name
        self.nargs = 1
        self.func = func
        if self.name in ['+', '-', '*']:
            self.nargs = 2
    def __call__(self, l, r=None):
        if self.nargs == 2:
            return self.func(l, r)
        else:
            return self.func(l)

In [18]:
class Terminal:
    def __init__(self, name, value):
        self.name = name
        self.value = value

In [19]:
plus = Operator('+', np.add)
minus = Operator('-', np.subtract)
star = Operator('*', np.multiply)
sin = Operator('s', np.sin)
cos = Operator('c', np.cos)
exp = Operator('e', np.exp)

In [20]:
functional_set = [plus, minus, star, sin, cos, exp]

In [25]:
h = 4
terminal_set = []
# we define x and y as the terminals
x = Terminal('x', 10)
y = Terminal('y', 20)
terminal_set.append(x)
terminal_set.append(y)
# we also add a set of random vaiables into the terminals set
for i in range(h+1):
    constant = Terminal(f'const{i}', np.random.randn())
    terminal_set.append(constant)

In [26]:
def generate_head(h, functional_set, terminal_set):
    h_res = h
    head = []
    while h_res > 0:
        gene = np.random.choice(functional_set + terminal_set)
        if type(gene) == Operator:
            h_res -= gene.nargs
        elif type(gene) == Terminal:
            h_res -= 1
        head.append(gene)
    return head
head = generate_head(h, functional_set, terminal_set)

In [27]:
def generate_tail(h, functional_set, terminal_set):
    t = h * (2 - 1) + 1
    t_res = t
    tail = []
    while t_res > 0:
        terminal = np.random.choice(terminal_set)
        t_res -= 1
        tail.append(terminal)
    return tail
tail = generate_tail(h, functional_set, terminal_set)

In [28]:
def print_chromosome(chromosome):
    print([gene.name for gene in chromosome])

In [29]:
print_chromosome(head)

['const3', 'const3', 'const3', '-']


In [30]:
print_chromosome(tail)

['const4', 'const0', 'const0', 'const0', 'x']


In [31]:
def calc(head, tail):
    if len(head) == 0:
        gene = tail[0]
        # here gene type is always Terminal, however, we keep it for future
        if type(gene) == Terminal:
            return gene.value, head, tail[1:]
        elif type(gene) == Operator:
            if gene.nargs == 2:
                left, new_head, new_tail = calc(head, tail[1:])
                right, new_head, new_tail = calc(new_head, new_tail)
                return gene(left, right), new_head, new_tail
            elif gene.nargs == 1:
                left, new_head, new_tail = calc(head, tail[1:])
                return gene(left), new_head, new_tail
    else:
        gene = head[0]
        if type(gene) == Terminal:
            return gene.value, head[1:], tail
        elif type(gene) == Operator:
            if gene.nargs == 2:
                left, new_head, new_tail = calc(head[1:], tail)
                right, new_head, new_tail = calc(new_head, new_tail)
                return gene(left, right), new_head, new_tail
            elif gene.nargs == 1:
                left, new_head, new_tail = calc(head[1:], tail)
                return gene(left), new_head, new_tail
            

calc(head, tail)

(-0.8083687470701534,
 [<__main__.Terminal at 0x7fe1e95edcd0>,
  <__main__.Terminal at 0x7fe1e95edcd0>,
  <__main__.Operator at 0x7fe1f09d0460>],
 [<__main__.Terminal at 0x7fe1e95ed520>,
  <__main__.Terminal at 0x7fe1e95e9670>,
  <__main__.Terminal at 0x7fe1e95e9670>,
  <__main__.Terminal at 0x7fe1e95e9670>,
  <__main__.Terminal at 0x7fe1e95e9460>])

In [32]:
generation_size = 20
generation = [(generate_head(4, functional_set, terminal_set),
               generate_tail(4, functional_set, terminal_set)) for i in range(generation_size)]