Skip to content

Commit

Permalink
Start documenting
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt Roberts committed Jul 31, 2016
1 parent fd3f069 commit d6bd528
Show file tree
Hide file tree
Showing 16 changed files with 219 additions and 7 deletions.
11 changes: 11 additions & 0 deletions .gitattributes
@@ -0,0 +1,11 @@
FullTreeDepthOne.png filter=lfs diff=lfs merge=lfs -text
FullTreeDepthThree.png filter=lfs diff=lfs merge=lfs -text
FullTreeDepthTwo.png filter=lfs diff=lfs merge=lfs -text
FullTrees.png filter=lfs diff=lfs merge=lfs -text
GrowTrees.png filter=lfs diff=lfs merge=lfs -text
SecondDegreePolynomial.png filter=lfs diff=lfs merge=lfs -text
ThirdDegreePolynomial.png filter=lfs diff=lfs merge=lfs -text
SecondDegreePolynomial.pdf filter=lfs diff=lfs merge=lfs -text
ThirdDegreePolynomial.pdf filter=lfs diff=lfs merge=lfs -text
Trees.graffle filter=lfs diff=lfs merge=lfs -text
Polynomials.numbers filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -2,3 +2,4 @@
project/project
project/target
target
*.txt
3 changes: 3 additions & 0 deletions FullTreeDepthOne.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions FullTreeDepthThree.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions FullTreeDepthTwo.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions FullTrees.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions GrowTrees.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
123 changes: 123 additions & 0 deletions PolynomialOfDegree3.md
@@ -0,0 +1,123 @@
07:41:53.810 [run-main-5] DEBUG GP$ - run=43, minFitness=0.48753065, distinct=10000 crossovers.length=10000, replicas.length=1900
07:41:56.277 [run-main-5] DEBUG Main$ - Fittest tree: Div(Sub(Sub(Sub(Div(Add(Div(Var('x),Con(-4.0)),Con(-3.0)),Sub(Con(2.0),Div(Con(-2.0),Con(2.0)))),Mul(Div(Var('x),Con(-4.0)),Mul(Var('x),Con(1.0)))),Mul(Div(Var('x),Con(-4.0)),Mul(Var('x),Con(1.0)))),Mul(Div(Var('x),Con(-3.0)),Div(Add(Con(-1.0),Con(-4.0)),Div(Con(-4.0),Con(-1.0))))),Div(Add(Div(Div(Con(0.0),Con(-3.0)),Sub(Sub(Mul(Div(Con(1.0),Con(3.0)),Con(-5.0)),Mul(Mul(Sub(Con(3.0),Con(-4.0)),Con(2.0)),Con(3.0))),Div(Sub(Sub(Div(Add(Sub(Div(Con(1.0),Con(3.0)),Div(Var('x),Con(-5.0))),Con(-3.0)),Mul(Con(0.0),Div(Con(-1.0),Sub(Div(Add(Con(0.0),Con(-3.0)),Div(Mul(Div(Con(4.0),Con(-5.0)),Div(Con(4.0),Mul(Div(Con(4.0),Con(-3.0)),Add(Var('x),Add(Mul(Con(0.0),Mul(Con(2.0),Con(0.0))),Add(Sub(Con(5.0),Con(1.0)),Div(Con(-3.0),Add(Con(2.0),Con(4.0))))))))),Sub(Add(Mul(Con(1.0),Con(2.0)),Con(0.0)),Div(Con(1.0),Con(2.0))))),Mul(Div(Div(Add(Var('x),Con(-3.0)),Div(Mul(Mul(Con(3.0),Con(-5.0)),Div(Div(Con(-5.0),Con(1.0)),Div(Con(-4.0),Con(-1.0)))),Con(2.0))),Div(Add(Con(3.0),Mul(Con(-1.0),Con(-3.0))),Mul(Div(Sub(Con(-3.0),Con(-1.0)),Div(Var('x),Con(-2.0))),Add(Div(Con(3.0),Con(4.0)),Mul(Con(2.0),Con(2.0)))))),Div(Var('x),Div(Con(-5.0),Con(-1.0)))))))),Mul(Div(Var('x),Con(-4.0)),Mul(Var('x),Con(1.0)))),Mul(Div(Mul(Con(1.0),Con(3.0)),Add(Con(-1.0),Con(-4.0))),Div(Sub(Con(-5.0),Con(4.0)),Div(Con(-4.0),Con(-1.0))))),Div(Div(Con(1.0),Con(3.0)),Add(Mul(Div(Con(1.0),Con(3.0)),Sub(Con(-4.0),Con(0.0))),Sub(Div(Mul(Con(1.0),Con(3.0)),Con(4.0)),Add(Con(-5.0),Con(-5.0)))))))),Add(Sub(Con(5.0),Con(1.0)),Mul(Con(0.0),Con(3.0)))),Add(Var('x),Sub(Add(Mul(Mul(Add(Mul(Mul(Sub(Add(Con(-2.0),Con(5.0)),Add(Con(-2.0),Con(4.0))),Add(Add(Con(-5.0),Con(4.0)),Sub(Var('x),Con(0.0)))),Mul(Sub(Add(Con(5.0),Con(4.0)),Sub(Con(0.0),Con(-1.0))),Div(Add(Div(Con(-4.0),Con(4.0)),Add(Con(5.0),Con(-2.0))),Sub(Con(-5.0),Add(Con(5.0),Con(-1.0)))))),Sub(Div(Con(2.0),Con(1.0)),Div(Con(3.0),Con(5.0)))),Mul(Con(1.0),Con(1.0))),Div(Con(0.0),Con(-1.0))),Sub(Var('x),Con(2.0))),Add(Con(-5.0),Con(-5.0))))))
07:41:56.277 [run-main-5] DEBUG Main$ - expected actual
07:41:56.278 [run-main-5] DEBUG Main$ - 2.5 2.5
07:41:56.278 [run-main-5] DEBUG Main$ - 2.5337815 2.5337815
07:41:56.278 [run-main-5] DEBUG Main$ - 2.5602508 2.5602498
07:41:56.278 [run-main-5] DEBUG Main$ - 2.5795932 2.5795932
07:41:56.278 [run-main-5] DEBUG Main$ - 2.592 2.592
07:41:56.278 [run-main-5] DEBUG Main$ - 2.5976562 2.5976567
07:41:56.278 [run-main-5] DEBUG Main$ - 2.5967498 2.5967498
07:41:56.278 [run-main-5] DEBUG Main$ - 2.5894685 2.5894685
07:41:56.278 [run-main-5] DEBUG Main$ - 2.5760002 2.5760002
07:41:56.278 [run-main-5] DEBUG Main$ - 2.5565314 2.5565317
07:41:56.278 [run-main-5] DEBUG Main$ - 2.5312505 2.5312502
07:41:56.278 [run-main-5] DEBUG Main$ - 2.5003438 2.5003438
07:41:56.278 [run-main-5] DEBUG Main$ - 2.4640007 2.4640002
07:41:56.278 [run-main-5] DEBUG Main$ - 2.4224072 2.422407
07:41:56.278 [run-main-5] DEBUG Main$ - 2.375751 2.3757508
07:41:56.278 [run-main-5] DEBUG Main$ - 2.3242197 2.3242192
07:41:56.279 [run-main-5] DEBUG Main$ - 2.2680006 2.2680008
07:41:56.279 [run-main-5] DEBUG Main$ - 2.207282 2.2072823
07:41:56.279 [run-main-5] DEBUG Main$ - 2.142251 2.1422508
07:41:56.279 [run-main-5] DEBUG Main$ - 2.0730953 2.0730948
07:41:56.279 [run-main-5] DEBUG Main$ - 2.0000014 2.0000014
07:41:56.279 [run-main-5] DEBUG Main$ - 1.9231579 1.9231579
07:41:56.279 [run-main-5] DEBUG Main$ - 1.8427515 1.8427517
07:41:56.279 [run-main-5] DEBUG Main$ - 1.7589703 1.7589704
07:41:56.279 [run-main-5] DEBUG Main$ - 1.6720021 1.6720021
07:41:56.279 [run-main-5] DEBUG Main$ - 1.5820336 1.5820336
07:41:56.279 [run-main-5] DEBUG Main$ - 1.4892523 1.4892523
07:41:56.279 [run-main-5] DEBUG Main$ - 1.393846 1.393846
07:41:56.279 [run-main-5] DEBUG Main$ - 1.2960026 1.2960026
07:41:56.279 [run-main-5] DEBUG Main$ - 1.195909 1.1959091
07:41:56.280 [run-main-5] DEBUG Main$ - 1.0937529 1.0937531
07:41:56.280 [run-main-5] DEBUG Main$ - 0.9897218 0.9897217
07:41:56.280 [run-main-5] DEBUG Main$ - 0.88400316 0.8840033
07:41:56.280 [run-main-5] DEBUG Main$ - 0.7767849 0.7767848
07:41:56.280 [run-main-5] DEBUG Main$ - 0.6682534 0.6682535
07:41:56.280 [run-main-5] DEBUG Main$ - 0.5585973 0.5585973
07:41:56.280 [run-main-5] DEBUG Main$ - 0.44800377 0.4480038
07:41:56.280 [run-main-5] DEBUG Main$ - 0.33666015 0.3366602
07:41:56.280 [run-main-5] DEBUG Main$ - 0.2247541 0.22475418
07:41:56.280 [run-main-5] DEBUG Main$ - 0.11247301 0.11247296
07:41:56.280 [run-main-5] DEBUG Main$ - 4.2915344E-6 4.2915317E-6
07:41:56.280 [run-main-5] DEBUG Main$ - -0.11246443 -0.11246445
07:41:56.280 [run-main-5] DEBUG Main$ - -0.22474575 -0.22474581
07:41:56.288 [run-main-5] DEBUG Main$ - -0.33665204 -0.336652
07:41:56.288 [run-main-5] DEBUG Main$ - -0.4479959 -0.44799584
07:41:56.288 [run-main-5] DEBUG Main$ - -0.5585897 -0.5585895
07:41:56.288 [run-main-5] DEBUG Main$ - -0.6682459 -0.66824603
07:41:56.288 [run-main-5] DEBUG Main$ - -0.77677727 -0.77677745
07:41:56.288 [run-main-5] DEBUG Main$ - -0.8839961 -0.88399625
07:41:56.288 [run-main-5] DEBUG Main$ - -0.989715 -0.9897149
07:41:56.288 [run-main-5] DEBUG Main$ - -1.0937463 -1.0937463
07:41:56.289 [run-main-5] DEBUG Main$ - -1.1959026 -1.1959028
07:41:56.289 [run-main-5] DEBUG Main$ - -1.2959964 -1.2959964
07:41:56.289 [run-main-5] DEBUG Main$ - -1.3938403 -1.3938406
07:41:56.289 [run-main-5] DEBUG Main$ - -1.4892467 -1.4892467
07:41:56.289 [run-main-5] DEBUG Main$ - -1.5820282 -1.5820279
07:41:56.289 [run-main-5] DEBUG Main$ - -1.671997 -1.671997
07:41:56.289 [run-main-5] DEBUG Main$ - -1.7589657 -1.7589661
07:41:56.289 [run-main-5] DEBUG Main$ - -1.8427472 -1.8427472
07:41:56.289 [run-main-5] DEBUG Main$ - -1.9231535 -1.9231538
07:41:56.289 [run-main-5] DEBUG Main$ - -1.9999974 -1.9999973
07:41:56.289 [run-main-5] DEBUG Main$ - -2.0730913 -2.073091
07:41:56.289 [run-main-5] DEBUG Main$ - -2.1422477 -2.1422477
07:41:56.289 [run-main-5] DEBUG Main$ - -2.207279 -2.2072794
07:41:56.289 [run-main-5] DEBUG Main$ - -2.267998 -2.2679977
07:41:56.289 [run-main-5] DEBUG Main$ - -2.3242168 -2.3242166
07:41:56.289 [run-main-5] DEBUG Main$ - -2.3757482 -2.3757484
07:41:56.289 [run-main-5] DEBUG Main$ - -2.4224048 -2.4224052
07:41:56.289 [run-main-5] DEBUG Main$ - -2.4639986 -2.4639988
07:41:56.289 [run-main-5] DEBUG Main$ - -2.5003426 -2.5003421
07:41:56.289 [run-main-5] DEBUG Main$ - -2.531249 -2.531249
07:41:56.289 [run-main-5] DEBUG Main$ - -2.5565305 -2.5565305
07:41:56.290 [run-main-5] DEBUG Main$ - -2.5759995 -2.5759995
07:41:56.290 [run-main-5] DEBUG Main$ - -2.5894685 -2.5894687
07:41:56.290 [run-main-5] DEBUG Main$ - -2.5967498 -2.5967495
07:41:56.290 [run-main-5] DEBUG Main$ - -2.5976562 -2.597656
07:41:56.290 [run-main-5] DEBUG Main$ - -2.5920002 -2.592
07:41:56.290 [run-main-5] DEBUG Main$ - -2.5795941 -2.5795944
07:41:56.290 [run-main-5] DEBUG Main$ - -2.5602508 -2.560251
07:41:56.290 [run-main-5] DEBUG Main$ - -2.5337822 -2.5337827
07:41:56.290 [run-main-5] DEBUG Main$ - -2.5000012 -2.5000012
07:41:56.290 [run-main-5] DEBUG Main$ - -2.4587202 -2.4587202
07:41:56.290 [run-main-5] DEBUG Main$ - -2.4097517 -2.409752
07:41:56.290 [run-main-5] DEBUG Main$ - -2.3529084 -2.3529086
07:41:56.290 [run-main-5] DEBUG Main$ - -2.2880025 -2.2880023
07:41:56.290 [run-main-5] DEBUG Main$ - -2.2148466 -2.2148464
07:41:56.290 [run-main-5] DEBUG Main$ - -2.133253 -2.1332533
07:41:56.290 [run-main-5] DEBUG Main$ - -2.043035 -2.043035
07:41:56.290 [run-main-5] DEBUG Main$ - -1.9440038 -1.9440037
07:41:56.290 [run-main-5] DEBUG Main$ - -1.8359733 -1.8359731
07:41:56.290 [run-main-5] DEBUG Main$ - -1.7187548 -1.718755
07:41:56.291 [run-main-5] DEBUG Main$ - -1.5921619 -1.5921619
07:41:56.291 [run-main-5] DEBUG Main$ - -1.456006 -1.4560057
07:41:56.291 [run-main-5] DEBUG Main$ - -1.3101003 -1.3101001
07:41:56.291 [run-main-5] DEBUG Main$ - -1.154257 -1.1542573
07:41:56.291 [run-main-5] DEBUG Main$ - -0.9882891 -0.98828894
07:41:56.291 [run-main-5] DEBUG Main$ - -0.8120084 -0.8120082
07:41:56.291 [run-main-5] DEBUG Main$ - -0.6252277 -0.6252277
07:41:56.291 [run-main-5] DEBUG Main$ - -0.4277599 -0.42775998
07:41:56.291 [run-main-5] DEBUG Main$ - -0.21941662 -0.21941668
07:41:56.291 [run-main-5] DEBUG Main$ - -1.1444092E-5 -1.10864585E-5
07:41:56.291 [run-main-5] DEBUG Main$ - 0.2306447 0.23064466
07:41:56.291 [run-main-5] DEBUG Main$ - 0.4727378 0.47273734
07:41:56.291 [run-main-5] DEBUG Main$ - 0.7264557 0.7264556
07:41:56.291 [run-main-5] DEBUG Main$ - 0.99198604 0.99198633
07:41:56.291 [run-main-5] DEBUG Main$ - 1.269516 1.2695163
07:41:56.291 [run-main-5] DEBUG Main$ - 1.5592341 1.559234
07:41:56.291 [run-main-5] DEBUG Main$ - 1.8613272 1.8613272
07:41:56.291 [run-main-5] DEBUG Main$ - 2.1759825 2.1759827
07:41:56.291 [run-main-5] DEBUG Main$ - 2.503387 2.5033875
07:41:56.291 [run-main-5] DEBUG Main$ - 2.8437304 2.8437302
07:41:56.291 [run-main-5] DEBUG Main$ - 3.1971984 3.1971986
07:41:56.291 [run-main-5] DEBUG Main$ - 3.5639772 3.5639775
07:41:56.291 [run-main-5] DEBUG Main$ - 3.9442587 3.9442585
07:41:56.291 [run-main-5] DEBUG Main$ - 4.3382263 4.3382254
07:41:56.291 [run-main-5] DEBUG Main$ - 4.746068 4.7460675
07:41:56.291 [run-main-5] DEBUG Main$ - 5.1679726 5.167974
07:41:56.291 [run-main-5] DEBUG Main$ - 5.6041274 5.6041274
07:41:56.291 [run-main-5] DEBUG Main$ - 6.05472 6.05472
07:41:56.291 [run-main-5] DEBUG Main$ - 6.5199375 6.5199375
3 changes: 3 additions & 0 deletions Polynomials.numbers
Git LFS file not shown
55 changes: 52 additions & 3 deletions README.md
Expand Up @@ -127,14 +127,14 @@ From looking at both the AST and the chart you can hopefully see that the third

### Generating a population of programs

In GP, the leafs of an AST, whether they be constants or variables, are referred to as the terminal set. The branches whether they be unary, binary or some other arity function are referred to as the function set. So, using the AST defined above we can create the following terminal and functional sets:
In GP, the leafs of an AST, whether they be constants or variables, are referred to as the terminal set. The branches whether they be unary, binary or some other arity function are referred to as the function set. Using the AST defined above the following terminal and functional sets can be created:

```scala
val terminalSet = IndexedSeq(Var('x)) ++ 1f.to(5f, 1f).map(Con)
val functionSet = IndexedSeq(Add, Sub, Div, Mul)
```

With these sets it's possible to grow an AST of some arbitrary depth `depth`. The simplest way to do this is to create a `full` tree. The algorithm is reasonably simple. If the depth has been reached then return a random terminal from the terminal set. Otherwise return a random function passing the result of the next recursion as it's argument(s). Here is a method that does that:
With these sets it's possible to generate an AST of some arbitrary depth `depth`. The simplest way to do this is to create a `full` tree. The algorithm is reasonably simple. If the depth has been reached then return a random terminal from the terminal set. Otherwise return a random function passing the result of the next recursion as it's argument(s). Here is a method that does that:

```scala
def full(
Expand All @@ -156,7 +156,7 @@ def random[T](elements: IndexedSeq[T]): T = {
}
```

Here are some example ASTs with depths `1`, `2` and `3` generated with by the `full` method:
Here are some example ASTs with depths `1`, `2` and `3` generated with the `full` method:

![Full trees](FullTrees.png "Full trees")

Expand Down Expand Up @@ -185,3 +185,52 @@ def grow(
def random(): Float = Random.nextFloat()
```

Here are three example ASTs all of depth `3` generated with `grow` method:

![Grow trees](GrowTrees.png "Grow trees")

These two methods can be composed together to create the method `rampHalfHalf`. The idea behind this is to use `grow` for one half of the population and `full` for the other. Further, the method starts at `depth` 1 and ramps up to some `maxDepth` (as opposed to all trees being the same `depth`):

```scala
def rampHalfHalf(
count: Int,
maxDepth: Int,
functions: IndexedSeq[(Exp, Exp) => Exp],
terminals: IndexedSeq[Exp]): Set[Exp] = {
@tailrec
def loop(acc: Set[Exp], i: Int, depth: Int): Set[Exp] = {
if(i == count) {
acc
} else {
val tree = if (i % 2 == 0) {
full(depth, functions, terminals)
} else {
grow(depth, functions, terminals)
}
val nextDepth = if (depth == maxDepth) 1 else depth + 1
if (acc.contains(tree)) {
loop(acc, i, nextDepth)
} else {
loop(acc + tree, i + 1, nextDepth)
}
}
}
loop(Set.empty, 0, 1)
}
```

The method above is a little finicky for several reasons. First, instead of comparing `acc.size` to `count` the value `i` is used. This is because the `size` method on set is inefficient. Second, if the `acc` set already contains a tree then `i` is not incremented. Third, and contrary to `i`, `depth` is always incremented unless the `maxDepth` has been reached and in this case it is set to `1`. This is because, especially for large values of `count`, it is possible that all permutations of trees at a certain `depth` have been generated. If `depth` were not incremented then the recursion could be infinite.

With the above, it is now possible to create an initial population:

```scala
val count = 10000
val maxDepth = 10
val terminalSet = IndexedSeq(Var('x)) ++ 1f.to(5f, 1f).map(Con)
val functionSet = IndexedSeq(Add, Sub, Div, Mul)
val initial = rampHalfHalf(count, maxDepth, functionSet, terminalSet).toVector
```

# Evaluating fitness

Evaluating fitness is easier than you might think. Given some input there is an expected output. The trick is to select the right inputs. Once that is done, however, evaluating fitness is just a matter of measuring the difference between the expected and actual outputs. For anyone who has done unit testing with multiple test cases this should feel familiar. The only part that might feel alien is measuring the difference and then reducing that measure into one figure for all test cases. For anyone with a machine learning background this will feel quite comfortable.
Binary file added SecondDegreePolynomial.pdf
Binary file not shown.
3 changes: 3 additions & 0 deletions SecondDegreePolynomial.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added ThirdDegreePolynomial.pdf
Binary file not shown.
3 changes: 3 additions & 0 deletions ThirdDegreePolynomial.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions Trees.graffle
Git LFS file not shown
9 changes: 5 additions & 4 deletions src/main/scala/Main.scala
Expand Up @@ -9,17 +9,18 @@ import scala.util.Random
import model._

object Main extends App with Logging {
val population = 10000
import GP._
val count = 10000
val maxDepth = 10
val terminalSet = IndexedSeq(Var('x)) ++ 1f.to(5f, 1f).map(Con)
val functionSet = IndexedSeq(Add, Sub, Div, Mul)
val initial = rampHalfHalf(count, maxDepth, functionSet, terminalSet).toVector

def pow(a: Float, b: Float): Float = Math.pow(a, b).toFloat
val expected = (-1f).to(1f, 0.05f).map(x => (Map('x -> x), pow(x, 2) - x - 2))

// val expected = (-3f).to(3f, 0.05f).map(x => (Map('x -> x), pow(x,3) / 4 + 3 * pow(x, 2) / 4 - 3 * x / 2 - 2))
val trees = GP.rampHalfHalf(population, maxDepth, functionSet, terminalSet).toVector
def criteria(fitness: Float): Boolean = fitness < 0.01f
val fitTree = GP.run(trees, expected, criteria)
val fitTree = run(initial, expected, criteria)
log.debug(s"Fittest tree: ${fitTree}")
log.debug("expected\t\tactual")
expected.foreach { case (symbols, expected) =>
Expand Down

0 comments on commit d6bd528

Please sign in to comment.