# Section 2 

Start looking in to a more realistic case -- evolving the full set of coefficients


In [10]:
# Boilerplate python imports
import sys
import csv
from math import *
import matplotlib
import matplotlib.pyplot as plt
import numpy as np


The code being shown here is heavily towards the evolutionary side. 

The key physical science and mathematics are in the imported module evolution1.
Keys being:
how to translate the parameters in to a prediction
how to score a prediction

In this case, the full set of linear coefficients are being evolved, bias and coefficients for GFS -- t2m, td, thickness (1000-850 mb), rh, and wind speed.

Though not shown yet, a good thing to do is to plot the predictions vs. their target. You can add that yourself, matplotlib is already imported.

In [19]:
# basic1 from the github

# Some global parameters:
nobs = 579
nparameters = 6

npopulation = 10
per_second = 60     # estimate of number of generations per second
genmax = int(60*per_second)

train_start = int(0)
train_end   = int(364)
np.random.seed(0)      # for reproducibility

from evolution1 import *

######################## ######################## ########################
# Now bring in the data for real work:
matchup_set = []

with open('testin.csv') as csvfile:
    k = 0
    sreader = csv.reader(csvfile, delimiter=",")
    for line in sreader:
        day     = float(line[0])
        t2m_gfs = float(line[1])
        td_gfs  = float(line[2])
        thick_gfs = float(line[3])
        rh_gfs  = float(line[4])
        speed   = float(line[5])
        obs_t2m = float(line[6])
        obs_td  = float(line[7])
        terr    = float(line[8])
        tderr  = float(line[9])

        #Note that obs_td, obs_t2m, tderr are being ignored. They can be
        #       added to the list.
        #  n.b.: note that it is terr that is used, not t2m itself.
        #Model and observation are well-enough correlated that it is the increment
        #which makes more sense to predict [Krasnopolsky,20NNN]
        m = matchup((day,t2m_gfs,td_gfs,thick_gfs,rh_gfs,speed,terr))
        matchup_set.append(m)
        k += 1

csvfile.close()
######################## ######################## ########################

Initialize and seed the population

Note the python structure used for initializing and adding to a list of things. Population and bests can be added to at will via the .append operation. We'll use this later (section 3) to collect all the parameter suites which are good in some respect (we'll decide what constitutes 'good').


In [12]:
# basic1 

#Initialize and seed the population
population = []
bests      = []       # Save all then-best versions
for k in range (0,npopulation):
    population.append(critter(nparameters))

weights = np.zeros((nparameters))
sdevs   = np.zeros((nparameters))
bests.append(critter(nparameters))
bests[0].init(weights, sdevs)
nbests = 1

#for reference, take the raw gfs output's score:
population[0].init(weights, sdevs)
score_gfs = population[0].skill(matchup_set, train_start, train_end)

print("uncorrected score in training period: ",
         population[0].skill(matchup_set, train_start, train_end) )
print("uncorrected score in evaluation period: ",
         population[0].skill(matchup_set, train_end+1, nobs), flush=True )
population[0].show_fcst(matchup_set, train_start, train_end)

population[0].weights[0] = -2.0
population[0].show_fcst(matchup_set, train_start, train_end)
population[0].weights[0] = 0.0

print("\n",flush=True)

uncorrected score in training period:  15.619538461538465
uncorrected score in evaluation period:  11.043857868020302
0.0 26.35 23.15 -9.0 78.0 3.8 3.57  zzz  0.0 3.57 False
1.0 26.35 22.65 -8.5 75.0 3.9 3.02  zzz  0.0 3.02 False
2.0 24.95 22.45 -7.6 85.0 7.8 8.84  zzz  0.0 8.84 False
3.0 24.65 22.05 -7.9 83.0 8.1 6.32  zzz  0.0 6.32 False
4.0 24.05 21.85 -7.6 86.0 13.7 5.16  zzz  0.0 5.16 False
5.0 26.85 22.95 -8.7 81.0 21.9 6.85  zzz  0.0 6.85 False
6.0 26.65 23.15 -8.9 78.0 7.9 9.98  zzz  0.0 9.98 False
7.0 26.85 23.25 -8.1 68.0 2.3 11.29  zzz  0.0 11.29 False
8.0 26.85 23.55 -9.4 79.0 5.6 6.29  zzz  0.0 6.29 False
9.0 27.45 23.55 -9.6 76.0 3.6 5.23  zzz  0.0 5.23 False
10.0 28.05 23.85 -9.7 73.0 2.6 8.61  zzz  0.0 8.61 False
11.0 28.55 23.85 -10.0 70.0 2.6 2.99  zzz  0.0 2.99 False
12.0 28.05 22.85 -9.2 70.0 1.7 6.38  zzz  0.0 6.38 False
13.0 27.55 23.25 -8.0 68.0 5.6 4.22  zzz  0.0 4.22 False
14.0 27.75 22.85 -8.7 75.0 2.6 4.97  zzz  0.0 4.97 False
15.0 28.35 23.25 -9.4 74.0 1.7 5

220.0 9.65 -1.25 1.8 37.0 8.3 29.09  zzz  0.0 29.09 False
221.0 17.55 14.35 -5.5 91.0 11.3 36.99  zzz  0.0 36.99 False
222.0 21.05 19.45 -7.3 84.0 8.0 31.05  zzz  0.0 31.05 False
223.0 22.15 19.45 -7.3 75.0 8.8 29.37  zzz  0.0 29.37 False
224.0 21.85 19.55 -8.5 78.0 12.4 19.63  zzz  0.0 19.63 False
225.0 18.85 9.65 -4.5 35.0 4.6 17.18  zzz  0.0 17.18 False
226.0 21.95 19.45 -8.2 85.0 8.2 21.39  zzz  0.0 21.39 False
227.0 18.25 15.05 -3.3 62.0 5.2 12.69  zzz  0.0 12.69 False
228.0 17.15 9.75 -11.2 69.0 7.7 16.04  zzz  0.0 16.04 False
229.0 11.75 2.75 -5.4 47.0 7.4 15.64  zzz  0.0 15.64 False
230.0 14.45 4.55 -8.0 47.0 4.8 17.23  zzz  0.0 17.23 False
231.0 11.95 1.95 -3.9 38.0 5.6 15.28  zzz  0.0 15.28 False
232.0 17.05 11.95 -10.4 78.0 8.8 24.27  zzz  0.0 24.27 False
233.0 17.55 13.05 -6.1 58.0 6.6 20.88  zzz  0.0 20.88 False
234.0 18.45 15.15 -9.4 85.0 15.4 27.89  zzz  0.0 27.89 False
235.0 11.45 2.75 -1.1 41.0 5.8 25.89  zzz  0.0 25.89 False
236.0 14.55 2.05 -5.1 33.0 4.4 26.77  zzz  

71.0 29.45 23.85 -10.2 68.0 5.5 6.12  zzz  -2.0 8.120000000000001 False
72.0 29.85 23.45 -11.1 65.0 6.5 5.41  zzz  -2.0 7.41 False
73.0 29.25 23.25 -10.0 72.0 2.7 7.58  zzz  -2.0 9.58 False
74.0 28.95 23.05 -10.0 72.0 2.7 10.06  zzz  -2.0 12.06 False
75.0 28.25 22.25 -9.3 70.0 5.0 7.14  zzz  -2.0 9.14 False
76.0 29.45 23.45 -10.8 70.0 5.4 6.67  zzz  -2.0 8.67 False
77.0 29.45 24.25 -10.4 75.0 6.1 4.45  zzz  -2.0 6.45 False
78.0 28.95 23.85 -10.1 78.0 4.8 3.95  zzz  -2.0 5.95 False
79.0 29.15 23.45 -10.8 76.0 5.9 4.15  zzz  -2.0 6.15 False
80.0 28.85 22.95 -10.8 75.0 6.3 1.63  zzz  -2.0 3.63 False
81.0 29.05 24.45 -10.2 78.0 9.4 0.72  zzz  -2.0 2.7199999999999998 False
82.0 29.15 24.55 -10.4 77.0 8.5 0.82  zzz  -2.0 2.82 False
83.0 28.65 23.35 -10.6 74.0 4.8 1.98  zzz  -2.0 3.98 False
84.0 27.65 23.15 -9.5 77.0 3.8 3.21  zzz  -2.0 5.21 False
85.0 27.75 22.95 -9.5 73.0 5.6 2.75  zzz  -2.0 4.75 False
86.0 28.15 23.15 -11.1 77.0 6.1 -1.29  zzz  -2.0 0.71 True
87.0 27.75 22.05 -11.2 75.0 4.

264.0 22.95 18.95 -9.6 75.0 10.8 18.51  zzz  -2.0 20.51 False
265.0 22.05 19.55 -8.7 75.0 4.6 20.38  zzz  -2.0 22.38 False
266.0 22.25 19.55 -8.0 72.0 2.9 22.25  zzz  -2.0 24.25 False
267.0 22.45 19.45 -7.2 59.0 4.2 21.89  zzz  -2.0 23.89 False
268.0 22.05 19.25 -7.4 74.0 3.9 25.38  zzz  -2.0 27.38 False
269.0 21.05 15.65 -7.4 58.0 3.4 26.61  zzz  -2.0 28.61 False
270.0 21.85 19.25 -8.7 83.0 13.0 27.96  zzz  -2.0 29.96 False
271.0 15.65 11.05 -3.8 76.0 5.6 26.21  zzz  -2.0 28.21 False
272.0 17.85 8.35 -5.7 37.0 4.4 31.74  zzz  -2.0 33.739999999999995 False
273.0 19.65 10.95 -7.5 39.0 4.6 28.54  zzz  -2.0 30.54 False
274.0 21.45 15.25 -9.8 58.0 5.3 27.56  zzz  -2.0 29.56 False
275.0 21.05 17.45 -7.3 59.0 7.7 32.16  zzz  -2.0 34.16 False
276.0 20.75 16.95 -7.8 74.0 1.9 31.31  zzz  -2.0 33.31 False
277.0 22.25 18.35 -9.0 73.0 3.3 29.47  zzz  -2.0 31.47 False
278.0 21.55 19.05 -7.0 74.0 10.9 29.88  zzz  -2.0 31.88 False
279.0 16.45 11.75 -4.7 74.0 10.4 22.01  zzz  -2.0 24.01 False
280.0 18

Initialize the population and find our first best. 

In [13]:
#Initializing the standard deviations for evolution ----------
#For the bias
sdevs[0] = 1.0
#For linear terms
for k in range (1,int(6)):
    sdevs[k] = 1.0

#For quadratic terms
#for k in range (int(6), nparameters):
#  sdevs[k] = 0.0125

#Initialize the population itself now -------------------------
for k in range (0,npopulation):
  weights[0] = np.random.normal(0,sdevs[0])
  for l in range (1, int(6) ):     #initialize only the linear part
    weights[l] = np.random.normal(0,sdevs[l])
  population[k].init(weights,sdevs)

#recall that the matchup_set is holding the matchups
#Find our first 'best' -- noting that we aren't saving raw gfs as an example
smin = 9999.
kbest = int(npopulation)
for k in range (0,npopulation):
    population[k].skill(matchup_set, train_start, train_end)
    if (population[k].score < smin):
        kbest = k
        smin = population[k].score

#Start accumulating our best critters
bests.append(critter(nparameters))
bests[nbests].init(population[kbest].weights, population[kbest].sdevs)
nbests += 1

population[kbest].show()
print("initial kbest, smin = ",kbest, smin, flush=True)


0 0.9500884175255894 1.0
1 -0.1513572082976979 1.0
2 -0.10321885179355784 1.0
3 0.41059850193837233 1.0
4 0.144043571160878 1.0
5 1.454273506962975 1.0
initial kbest, smin =  1 9.775825339684086


For this evolution, we are using only mutation -- as would happen with bacteria (haploid).

As an analogy to diploids (plants, animals, people), one could also have 'crossover' mutations. Namely, to select two parents and take the first M genes from the first parent, and the remainder from the second. 

In [14]:
######################## ######################## ########################
#      Now carry out the (mutation-only) evolution
#swap best in to all slots
#then evolve a new raft of critters from that
#evaluate them
#repeat until limit of generations or happy

for gen in range(0,genmax):
    #print("generation ", gen, flush=True)

    population[0].copy(population[kbest])
    population[0].score = population[kbest].score
    score_best = float(population[0].score)
    smin = score_best
    kbest = 0
    for k in range (1, npopulation):
        population[k].copy(population[0])
        population[k].evolve()
        population[k].skill(matchup_set, train_start, train_end)
        if (population[k].score < score_best):
            kbest = k
            smin = population[k].score
            bests.append(critter(nparameters))
            bests[nbests].init(population[kbest].weights, population[kbest].sdevs)
            nbests += 1
    if (kbest != 0):
        if (score_gfs != 0):
          print("new best ",gen, kbest, smin, score_best, smin/score_gfs, flush=True)
        else:
          print("new best ",gen, kbest, smin, score_best, flush=True)
        population[kbest].show()


new best  2 8 9.18612267636147 9.775825339684086 0.5881174209456552
0 0.5667513171053311 2.0106481828332865
1 -0.5476984668883029 1.0037780078337095
2 -0.7830473750545375 2.5391982310016687
3 -1.1741428077506328 1.4048983955008632
4 0.4201216631199747 0.9844402124417343
5 0.28774992541344924 1.174600592224003
new best  7 4 8.99856052530168 9.18612267636147 0.576109245958818
0 0.3008453533481656 0.49676963405803537
1 -0.5411681711223134 0.25704555014951447
2 -0.6991945341402486 0.07265118793785233
3 -1.0807479954877337 0.5414053400004487
4 0.4243006564176645 0.19841368640833443
5 0.34947730887724393 0.620659800543008
new best  24 6 8.915433328045856 8.99856052530168 0.5707872450904493
0 1.2938766377537154 1.2224427223428291
1 0.08071736381312078 1.1384176498319518
2 -1.7097175147179318 0.9202484815143807
3 -0.7337689771944809 0.6773405339892832
4 0.49154709750139935 0.7833644975385049
5 -0.011123149348269856 0.7300687592962771
new best  124 9 8.704159177795512 8.915433328045856 0.557260

Now consider what we found along the way

In [16]:
######################## ######################## ########################
if (score_gfs != 0):
  print("best score in training period ",gen, kbest, smin, score_best, smin/score_gfs, flush=True)
else:
  print("best score in training period ",gen, kbest, smin, score_best, flush=True)
print("score in the untrained period: ",population[kbest].skill(matchup_set, train_end+1, nobs))

print("found ",nbests,"new bests along the way\n")
for k in range (0, nbests):
  bests[k].show()
  print("\n")



best score in training period  3599 0 7.9351120592361255 7.9351120592361255 0.508024745979245
score in the untrained period:  5.917958099935862
found  13 new bests along the way

0 0.0 0.0
1 0.0 0.0
2 0.0 0.0
3 0.0 0.0
4 0.0 0.0
5 0.0 0.0


0 0.9500884175255894 1.0
1 -0.1513572082976979 1.0
2 -0.10321885179355784 1.0
3 0.41059850193837233 1.0
4 0.144043571160878 1.0
5 1.454273506962975 1.0


0 0.5667513171053311 2.0106481828332865
1 -0.5476984668883029 1.0037780078337095
2 -0.7830473750545375 2.5391982310016687
3 -1.1741428077506328 1.4048983955008632
4 0.4201216631199747 0.9844402124417343
5 0.28774992541344924 1.174600592224003


0 0.3008453533481656 0.49676963405803537
1 -0.5411681711223134 0.25704555014951447
2 -0.6991945341402486 0.07265118793785233
3 -1.0807479954877337 0.5414053400004487
4 0.4243006564176645 0.19841368640833443
5 0.34947730887724393 0.620659800543008


0 1.2938766377537154 1.2224427223428291
1 0.08071736381312078 1.1384176498319518
2 -1.7097175147179318 0.920248

In [18]:
print("Forecasts in the training period:")
population[0].show_fcst(matchup_set, train_start, train_end)
print("Untrained forecasts:")
population[0].show_fcst(matchup_set, train_end, nobs)


Forecasts in the training period:
0.0 26.35 23.15 -9.0 78.0 3.8 3.57  zzz  13.027270104872372 -9.457270104872372 False
1.0 26.35 22.65 -8.5 75.0 3.9 3.02  zzz  12.457754694686097 -9.437754694686097 False
2.0 24.95 22.45 -7.6 85.0 7.8 8.84  zzz  15.373705285066437 -6.533705285066437 True
3.0 24.65 22.05 -7.9 83.0 8.1 6.32  zzz  15.187956023182927 -8.867956023182927 False
4.0 24.05 21.85 -7.6 86.0 13.7 5.16  zzz  15.686429840628467 -10.526429840628467 False
5.0 26.85 22.95 -8.7 81.0 21.9 6.85  zzz  11.847554257021901 -4.997554257021902 True
6.0 26.65 23.15 -8.9 78.0 7.9 9.98  zzz  12.469231383683699 -2.4892313836836983 True
7.0 26.85 23.25 -8.1 68.0 2.3 11.29  zzz  9.74872676412773 1.5412732358722696 True
8.0 26.85 23.55 -9.4 79.0 5.6 6.29  zzz  12.738383932559719 -6.448383932559719 False
9.0 27.45 23.55 -9.6 76.0 3.6 5.23  zzz  11.977752928094752 -6.7477529280947515 False
10.0 28.05 23.85 -9.7 73.0 2.6 8.61  zzz  10.799045080772554 -2.189045080772555 True
11.0 28.55 23.85 -10.0 70.0 2.6

150.0 25.25 19.05 -11.9 72.0 10.3 18.58  zzz  14.893597261359261 3.686402738640737 True
151.0 25.45 19.95 -11.5 77.0 10.7 17.67  zzz  15.46427407468245 2.2057259253175516 True
152.0 25.65 20.25 -10.8 69.0 7.5 11.76  zzz  12.91153809155567 -1.15153809155567 True
153.0 25.75 22.55 -8.9 73.0 6.3 10.75  zzz  11.805775999577573 -1.0557759995775733 True
154.0 23.15 18.85 -5.5 53.0 7.7 13.71  zzz  8.675435133349808 5.034564866650193 True
155.0 22.05 12.95 -12.5 63.0 13.5 15.94  zzz  18.0284146945738 -2.0884146945738014 True
156.0 23.25 17.05 -10.8 77.0 16.5 19.92  zzz  17.717174949477144 2.2028250505228577 True
157.0 24.75 20.35 -8.9 85.0 15.6 17.53  zzz  16.662612959718018 0.867387040281983 True
158.0 25.65 20.15 -11.4 77.0 6.6 13.43  zzz  15.718891130763398 -2.288891130763398 True
159.0 24.45 19.15 -9.2 70.0 9.0 10.56  zzz  13.988802076030389 -3.4288020760303883 True
160.0 22.65 18.55 -7.5 90.0 14.4 15.98  zzz  20.150442483622772 -4.170442483622772 True
161.0 24.65 20.85 -8.8 87.0 11.2 20.2

359.0 27.25 21.15 -10.7 69.0 5.4 0.58  zzz  12.043640765484664 -11.463640765484664 False
360.0 27.05 20.65 -10.7 68.0 5.0 -0.73  zzz  12.279294464455196 -13.009294464455197 False
361.0 27.15 21.65 -9.9 70.0 5.4 6.04  zzz  11.764894985266876 -5.724894985266876 True
362.0 27.35 21.45 -10.0 71.0 4.2 9.57  zzz  12.393486823829164 -2.823486823829164 True
363.0 27.05 21.85 -9.6 71.0 3.7 5.38  zzz  12.058120822245773 -6.678120822245773 False
mean rms  -0.06321366929473529 7.751678288089013
Untrained forecasts:
364.0 27.45 21.65 -10.6 74.0 5.0 3.01  zzz  13.136927609741111 -10.126927609741111 False
365.0 27.65 22.25 -11.0 76.0 10.7 4.32  zzz  12.554609861326897 -8.234609861326897 False
366.0 26.05 21.45 -8.6 83.0 11.2 -0.62  zzz  15.272072191592 -15.892072191592 False
367.0 26.15 19.45 -11.4 79.0 6.6 6.71  zzz  16.917639869368298 -10.207639869368297 False
368.0 26.95 19.55 -10.3 67.0 4.7 4.17  zzz  12.978055244568306 -8.808055244568306 False
369.0 27.55 20.55 -9.7 68.0 2.4 9.77  zzz  12.428488

513.0 24.55 14.45 -11.1 54.0 7.1 12.33  zzz  13.92764272057356 -1.5976427205735604 True
514.0 25.35 18.15 -12.0 69.0 5.7 5.91  zzz  15.350632733333097 -9.440632733333096 True
515.0 25.75 18.85 -11.1 70.0 1.8 11.31  zzz  15.253706822364814 -3.9437068223648133 True
516.0 24.75 19.05 -10.3 72.0 4.6 15.86  zzz  15.378837824284084 0.4811621757159159 True
517.0 21.65 13.55 -8.5 52.0 9.7 12.21  zzz  13.849757987597124 -1.6397579875971235 True
518.0 15.95 7.85 -11.1 61.0 14.2 13.17  zzz  22.838026974172898 -9.668026974172898 True
519.0 16.55 4.85 -3.2 31.0 7.9 14.33  zzz  15.54271474035097 -1.21271474035097 True
520.0 20.45 14.35 -9.0 68.0 11.1 14.34  zzz  18.217994685452055 -3.8779946854520553 True
521.0 22.05 16.45 -11.3 77.0 11.0 8.72  zzz  19.220802907212892 -10.500802907212892 True
522.0 24.05 18.75 -11.5 78.0 8.7 12.94  zzz  17.36716481947781 -4.427164819477811 True
523.0 23.25 18.05 -10.0 73.0 5.2 13.25  zzz  16.746603651115024 -3.4966036511150236 True
524.0 21.15 13.75 -7.7 63.0 2.2 13