In [28]:
import os
import csv
import torch
import pandas as pd
import matplotlib.pyplot as plt

from gpolnel.utils.datasets import load_boston
from torch.utils.data import TensorDataset, DataLoader

from gpolnel.problems.inductive_programming import SML
from gpolnel.utils.utils import train_test_split
from gpolnel.utils.ffunctions import Ffunctions
from gpolnel.utils.inductive_programming import function_map
from gpolnel.algorithms.genetic_algorithm import GeneticAlgorithm
from gpolnel.operators.initializers import grow, prm_grow, ERC, rhh
from gpolnel.operators.variators import swap_xo, prm_subtree_mtn, prm_gs_xo, prm_gs_mtn, prm_point_mtn, prm_efficient_gs_xo, prm_efficient_gs_mtn
from gpolnel.operators.selectors import prm_tournament, roulette_wheel, double_tournament
from sklearn.preprocessing import MinMaxScaler

In [29]:
X = pd.read_csv('datamart/data_project_nel.csv').fillna(0)
y = pd.read_csv('datamart/y_lactose.csv')
y

Unnamed: 0,lactose_percent
0,4.953503
1,4.983128
2,4.889104
3,4.868969
4,4.845402
...,...
319,4.891620
320,4.800946
321,4.916133
322,5.024776


In [30]:
X = torch.tensor(X.values, dtype=torch.float32)
y = torch.tensor(y.values, dtype=torch.float32).reshape(1,-1)[0]
print(X.shape, y.shape)

torch.Size([324, 14]) torch.Size([324])


In [31]:
seed = 1
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
shuffle, p_test, p_val, n_batches_pct = True, .3, .3, 1

# Data split



# Train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, p_test=p_test, seed=seed)


# Train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, p_test=p_val, seed=seed)

# Scaling
scaler = MinMaxScaler()
X_train = torch.tensor(scaler.fit_transform(X_train), dtype=torch.float32)
X_val = torch.tensor(scaler.transform(X_val), dtype=torch.float32)
X_test = torch.tensor(scaler.transform(X_test), dtype=torch.float32)

# scaler = MinMaxScaler()
# y_train = torch.tensor(scaler.fit_transform(y_train.reshape(-1, 1)), dtype=torch.float32).reshape(1,-1)[0]
# y_val = torch.tensor(scaler.transform(y_val.reshape(-1, 1)), dtype=torch.float32).reshape(1,-1)[0]
# y_test = torch.tensor(scaler.transform(y_test.reshape(-1, 1)), dtype=torch.float32).reshape(1,-1)[0]

# Data description
total_batches = 3
batch_size = X_train.shape[0]
print('Batch size: {}\nTotal batches: {}\n'.format(batch_size, total_batches))

print('Train:\t{}\nVal:\t{}\nTest:\t{}'.format(X_train.shape[0], X_val.shape[0], X_test.shape[0]))
print('Total:\t{}'.format(X_train.shape[0] + X_val.shape[0] + X_test.shape[0]))

# Creates training and validatation data sets
ds_train = TensorDataset(X_train, y_train)
ds_val = TensorDataset(X_val, y_val)

# Creates training and test data loaders
dl_train = DataLoader(ds_train, batch_size, shuffle)
dl_val = DataLoader(ds_val, batch_size, shuffle)


Batch size: 159
Total batches: 3

Train:	159
Val:	68
Test:	97
Total:	324


In [5]:
type(y_train)

torch.Tensor

In [6]:
y_train

tensor([4.7904, 4.8916, 4.9784, 4.9259, 4.7225, 5.0039, 4.8479, 4.8490, 4.7730,
        4.8473, 4.8589, 4.8838, 4.8986, 4.8768, 4.9678, 4.9228, 4.8739, 4.8839,
        4.9045, 4.9271, 4.8631, 4.8672, 4.9949, 4.9535, 4.8955, 4.8183, 4.8664,
        5.0020, 4.8861, 4.9711, 4.8549, 4.8118, 4.8280, 4.8668, 4.9138, 4.8009,
        4.8888, 4.9296, 4.8431, 4.8615, 4.8632, 4.8632, 4.8026, 4.8982, 4.8711,
        4.9086, 4.8717, 4.8387, 4.8649, 5.0403, 4.9555, 4.9563, 4.8480, 4.8391,
        4.8125, 4.8317, 4.8587, 4.8824, 4.8894, 4.9064, 4.9003, 4.9917, 4.9800,
        4.9573, 4.8454, 4.9126, 4.8432, 4.8322, 4.9707, 4.8353, 4.8816, 4.9234,
        4.9728, 4.8702, 4.8349, 4.9922, 4.8665, 5.0011, 4.8746, 4.8741, 4.9706,
        4.9275, 4.9488, 4.7475, 4.9439, 4.8494, 4.9054, 4.8136, 4.9132, 4.9583,
        5.0106, 4.7435, 4.8423, 4.8367, 4.8956, 4.7809, 4.9028, 4.9514, 4.8942,
        4.8089, 4.7972, 4.8893, 4.9367, 5.0590, 4.8803, 5.0311, 4.9639, 4.8493,
        4.8423, 4.9313, 4.9669, 4.8828, 

In [32]:
fset = [function_map['add'], function_map['sub'], function_map['mul'], function_map['div']]

sspace_sml = {
    'n_dims': X_train.shape[1],
    'function_set': fset, 'constant_set': ERC(-1., 1.),
    'p_constants': 0.1,
    'max_init_depth': 3,
    'max_depth': 10, 
    'n_batches': total_batches,
    'device': device
}

pi_sml = SML(
    sspace=sspace_sml,
    ffunction=Ffunctions('mape'),
    dl_train=dl_train, dl_test=dl_val,  # For the algorithm, the unseen is our validation!
    n_jobs=8
)

In [36]:
ps = 10
selection_pressure = .05
mutation_prob = .1
xo_prob = .9
has_elitism = True
allow_reproduction = False
ms = torch.Tensor([.05]).to(device)

mheuristic = GeneticAlgorithm(
    pi=pi_sml,
    initializer=rhh,
    selector=double_tournament(pressure=selection_pressure),
    crossover=prm_gs_xo(initializer=prm_grow(sspace_sml), device=device),
    mutator=prm_gs_mtn(initializer=prm_grow(sspace_sml), ms=ms),
    pop_size=ps,
    p_m=mutation_prob,
    p_c=xo_prob,
    elitism=has_elitism,
    reproduction=allow_reproduction,  # False = or xo or mutation
    device=device,
    seed=seed
)

In [34]:
mheuristic._initialize()

TypeError: grow() missing 1 required positional argument: 'n_sols'

In [25]:
print(mheuristic.pop)

0)	[sub, mul, div, 6, tensor(-0.9433, device='mps:0'), 12, div, 3, mul, tensor(0.2991, device='mps:0'), 6] (fit: tensor(12.7297, device='mps:0'))
1)	[sub, sub, 8, 10, div, 6, 11] (fit: tensor(12.7297, device='mps:0'))
2)	[mul, 11, 10] (fit: tensor(12.7297, device='mps:0'))
3)	[sub, 7, 0] (fit: tensor(12.7297, device='mps:0'))
4)	[div, 2, 11] (fit: tensor(12.7297, device='mps:0'))
5)	[mul, 1, 6] (fit: tensor(12.7297, device='mps:0'))
6)	[mul, div, 13, 2, add, 13, 5] (fit: tensor(12.7297, device='mps:0'))
7)	[mul, 9, div, 8, 0] (fit: tensor(12.7297, device='mps:0'))
8)	[div, div, 6, tensor(0.0802, device='mps:0'), 0] (fit: tensor(12.7297, device='mps:0'))
9)	[sub, sub, mul, 10, tensor(-0.9666, device='mps:0'), mul, 12, 1, sub, 2, 4] (fit: tensor(12.7297, device='mps:0'))



In [26]:
print(f'\nGP population: {mheuristic.pop.__class__} ({len(mheuristic.pop)} individuals)')

print(f'\nPoppulation fitness {mheuristic.pop.fit[:3]}...')
print(f'Poppulation valid {mheuristic.pop.valid[:3]}...\n')

for i in range(3):
    print(f'Individual {i}: {mheuristic.pop.individuals[i].repr_}, fitness {mheuristic.pop.individuals[i].fit}')

print(f'\nGP best individual {mheuristic.best_sol.printTree(out="string")}, fitness {mheuristic.best_sol.fit}\n\n')


GP population: <class 'gpolnel.utils.population.PopulationTree'> (10 individuals)

Poppulation fitness tensor([12.7297, 12.7297, 12.7297], device='mps:0')...
Poppulation valid [True, True, True]...

Individual 0: [sub, mul, div, 6, tensor(-0.9433, device='mps:0'), 12, div, 3, mul, tensor(0.2991, device='mps:0'), 6], fitness 12.729732513427734
Individual 1: [sub, sub, 8, 10, div, 6, 11], fitness 12.729732513427734
Individual 2: [mul, 11, 10], fitness 12.729732513427734

GP best individual mul( x_9, div( x_8, x_0 ) ), fitness 12.729732513427734




In [27]:
# # Log settings
# file_dir = './log/'
# file_name = 'intro.log'
# log_path = file_dir + file_name
# if os.path.exists(file_dir + file_name):
#     os.remove(file_dir + file_name)
# if not os.path.exists(file_dir):
#     os.makedirs(file_dir)

# Learning
n_iter = 50
mheuristic.solve(
    n_iter,
    verbose=3, 
    # log=3, log_path=log_path,
    test_elite=True
)

-------------------------------------------------------------------------------------------------------
           |                    Best solution                      |            Population            |
-------------------------------------------------------------------------------------------------------
Generation | Length   Fitness          Test Fitness         Timing | AVG Fitness           STD Fitness
-------------------------------------------------------------------------------------------------------
0          | 7        10.7887          1.09076               0.024 | -1                             -1
1          | 31       8.80373          1.63287               0.044 | -1                             -1
2          | 67       8.10672          0.993637              0.070 | -1                             -1
3          | 149      7.19125          0.888846              0.136 | -1                             -1
4          | 149      7.19125          0.888846              0.232 | 

KeyboardInterrupt: 

#### List of exercises

<hr />

##### **Exercise 1.a - Done**
**Implement the full and the ramped half-n-half initializers.**

\>> *The rhh initialization implementation will be evaluated in the final project. Its solution will be provided before the project delivery, so it is a basic implementation exercise.*


<hr />

<br />

<hr />

##### **Exercise 1.b - Done**
**Implement the Hoist mutation.**

\>> *The hoist mutation implementation will be evaluated in the final project. Its solution will be provided before the project delivery, so it is a basic implementation exercise.*


<hr />

<br />

<hr />

##### **Exercise 2 - Done**
**Implement the double tournament selection algorithm.**

\>> *The Double tournament selection algorithm implementation will be evaluated in the final project. Its solution will be provided only after the project delivery, so it is an advanced implementation exercise.*

Double Tournament selection algorithm

It performs two sequential tournament selections, one for each objective, e.g., RMSE and tree size.

<hr />

<br />

<hr />

##### **Exercise 1  - Done**
**Implement the Geometric Semantic Crossover and Mutation proposed by Moraglio et al. 2012.**

\>> *The GSXO and GSM implementation will be evaluated in the final project. Its solution will be provided before the project delivery, so it is a basic implementation exercise.*


<hr />

<br />

<hr />

##### **Exercise 2  - Done**
**Implement the Efficient Geometric Semantic Crossover and Mutation proposed by Vanneschi et al. 2013.**

\>> *The Efficient GSXO and GSM implementation will be evaluated in the final project. Its solution will be provided before the project delivery, so it is a basic implementation exercise.*


<hr />

<br />