# Section 2 

Start looking in to a more realistic case -- evolving the full set of coefficients


In [1]:
# Boilerplate python imports
import sys
import csv
from math import *
import matplotlib
import matplotlib.pyplot as plt
import numpy as np


The code being shown here is heavily towards the evolutionary side. 

The key physical science and mathematics are in the imported module evolution1.
Keys being:
how to translate the parameters in to a prediction
how to score a prediction

In this case, the full set of linear coefficients are being evolved, bias and coefficients for GFS -- t2m, td, thickness (1000-850 mb), rh, and wind speed.

Though not shown yet, a good thing to do is to plot the predictions vs. their target. You can add that yourself, matplotlib is already imported.

In [2]:
# basic1 from the github

# Some global parameters:
nobs = 579
nparameters = 6

npopulation = 10
per_second = 60     # estimate of number of generations per second
genmax = int(60*per_second)

train_start = int(0)
train_end   = int(364)
np.random.seed(0)      # for reproducibility

from scores import *
from evolution2 import *

######################## ######################## ########################
# Now bring in the data for real work:
matchup_set = []

with open('testin.csv') as csvfile:
    k = 0
    sreader = csv.reader(csvfile, delimiter=",")
    for line in sreader:
        day     = float(line[0])
        t2m_gfs = float(line[1])
        td_gfs  = float(line[2])
        thick_gfs = float(line[3])
        rh_gfs  = float(line[4])
        speed   = float(line[5])
        obs_t2m = float(line[6])
        obs_td  = float(line[7])
        terr    = float(line[8])
        tderr  = float(line[9])

        #Note that obs_td, obs_t2m, tderr are being ignored. They can be
        #       added to the list.
        #  n.b.: note that it is terr that is used, not t2m itself.
        #Model and observation are well-enough correlated that it is the increment
        #which makes more sense to predict [Krasnopolsky,20NNN]
        m = matchup((day,t2m_gfs,td_gfs,thick_gfs,rh_gfs,speed,terr))
        matchup_set.append(m)
        k += 1

csvfile.close()
######################## ######################## ########################

Initialize and seed the population

Note the python structure used for initializing and adding to a list of things. Population and bests can be added to at will via the .append operation. We'll use this later (section 3) to collect all the parameter suites which are good in some respect (we'll decide what constitutes 'good').


In [3]:
# basic1 

#Initialize and seed the population
population = []
bests      = []       # Save all then-best versions
for k in range (0,npopulation):
    population.append(critter(nparameters))

weights = np.zeros((nparameters))
sdevs   = np.zeros((nparameters))
bests.append(critter(nparameters))
bests[0].init(weights, sdevs)
nbests = 1

#for reference, take the raw gfs output's score:
population[0].init(weights, sdevs)
score_gfs = population[0].skill(matchup_set, train_start, train_end, metric = RMS)

print("uncorrected score in training period: ",
         population[0].skill(matchup_set, train_start, train_end) )
print("uncorrected score in evaluation period: ",
         population[0].skill(matchup_set, train_end+1, nobs), flush=True )
population[0].show_fcst(matchup_set, train_start, train_end)

population[0].weights[0] = 0.0

print("\n",flush=True)

uncorrected score in training period:  16.771062382110962
uncorrected score in evaluation period:  11.899786904482491
0.0 26.35 23.15 -9.0 78.0 3.8 3.57  zzz  0.0 3.57 False
1.0 26.35 22.65 -8.5 75.0 3.9 3.02  zzz  0.0 3.02 False
2.0 24.95 22.45 -7.6 85.0 7.8 8.84  zzz  0.0 8.84 False
3.0 24.65 22.05 -7.9 83.0 8.1 6.32  zzz  0.0 6.32 False
4.0 24.05 21.85 -7.6 86.0 13.7 5.16  zzz  0.0 5.16 False
5.0 26.85 22.95 -8.7 81.0 21.9 6.85  zzz  0.0 6.85 False
6.0 26.65 23.15 -8.9 78.0 7.9 9.98  zzz  0.0 9.98 False
7.0 26.85 23.25 -8.1 68.0 2.3 11.29  zzz  0.0 11.29 False
8.0 26.85 23.55 -9.4 79.0 5.6 6.29  zzz  0.0 6.29 False
9.0 27.45 23.55 -9.6 76.0 3.6 5.23  zzz  0.0 5.23 False
10.0 28.05 23.85 -9.7 73.0 2.6 8.61  zzz  0.0 8.61 False
11.0 28.55 23.85 -10.0 70.0 2.6 2.99  zzz  0.0 2.99 False
12.0 28.05 22.85 -9.2 70.0 1.7 6.38  zzz  0.0 6.38 False
13.0 27.55 23.25 -8.0 68.0 5.6 4.22  zzz  0.0 4.22 False
14.0 27.75 22.85 -8.7 75.0 2.6 4.97  zzz  0.0 4.97 False
15.0 28.35 23.25 -9.4 74.0 1.7 5

224.0 21.85 19.55 -8.5 78.0 12.4 19.63  zzz  0.0 19.63 False
225.0 18.85 9.65 -4.5 35.0 4.6 17.18  zzz  0.0 17.18 False
226.0 21.95 19.45 -8.2 85.0 8.2 21.39  zzz  0.0 21.39 False
227.0 18.25 15.05 -3.3 62.0 5.2 12.69  zzz  0.0 12.69 False
228.0 17.15 9.75 -11.2 69.0 7.7 16.04  zzz  0.0 16.04 False
229.0 11.75 2.75 -5.4 47.0 7.4 15.64  zzz  0.0 15.64 False
230.0 14.45 4.55 -8.0 47.0 4.8 17.23  zzz  0.0 17.23 False
231.0 11.95 1.95 -3.9 38.0 5.6 15.28  zzz  0.0 15.28 False
232.0 17.05 11.95 -10.4 78.0 8.8 24.27  zzz  0.0 24.27 False
233.0 17.55 13.05 -6.1 58.0 6.6 20.88  zzz  0.0 20.88 False
234.0 18.45 15.15 -9.4 85.0 15.4 27.89  zzz  0.0 27.89 False
235.0 11.45 2.75 -1.1 41.0 5.8 25.89  zzz  0.0 25.89 False
236.0 14.55 2.05 -5.1 33.0 4.4 26.77  zzz  0.0 26.77 False
237.0 12.85 3.05 -4.7 46.0 4.5 25.63  zzz  0.0 25.63 False
238.0 16.25 12.05 -5.9 62.0 6.7 26.81  zzz  0.0 26.81 False
239.0 18.95 14.95 -8.3 78.0 7.0 30.06  zzz  0.0 30.06 False
240.0 19.15 16.85 -7.0 79.0 6.2 26.37  zzz  

Initialize the population and find our first best. 

In [5]:
#Initializing the standard deviations for evolution ----------
#For the bias
sdevs[0] = 1.0
#For linear terms
for k in range (1,int(6)):
    sdevs[k] = 1.0

#For quadratic terms
#for k in range (int(6), nparameters):
#  sdevs[k] = 0.0125

#Initialize the population itself now -------------------------
for k in range (0,npopulation):
  weights[0] = np.random.normal(0,sdevs[0])
  for l in range (1, int(6) ):     #initialize only the linear part
    weights[l] = np.random.normal(0,sdevs[l])
  population[k].init(weights,sdevs)

#recall that the matchup_set is holding the matchups
#Find our first 'best' -- noting that we aren't saving raw gfs as an example
smin = 9999.
kbest = int(npopulation)
for k in range (0,npopulation):
    population[k].skill(matchup_set, train_start, train_end)
    if (population[k].score < smin):
        kbest = k
        smin = population[k].score

#Start accumulating our best critters
bests.append(critter(nparameters))
bests[nbests].init(population[kbest].weights, population[kbest].sdevs)
nbests += 1

population[kbest].show()
print("initial kbest, smin = ",kbest, smin, flush=True)


0 -0.672  1.000  1 -0.360  1.000  2 -0.813  1.000  3 -1.726  1.000  4 0.177  1.000  5 -0.402  1.000  score  16.965  
initial kbest, smin =  0 16.964669848958508


## Type of evolution

For this evolution, we are using only mutation -- as would happen with bacteria (haploid).

As an analogy to diploids (plants, animals, people), one could also have 'crossover' mutations. Namely, to select two parents and take the first M genes from the first parent, and the remainder from the second. 

In [6]:
######################## ######################## ########################
#      Now carry out the (mutation-only) evolution
#swap best in to all slots
#then evolve a new raft of critters from that
#evaluate them
#repeat until limit of generations or happy

for gen in range(0,genmax):
    #print("generation ", gen, flush=True)

    population[0].copy(population[kbest])
    population[0].score = population[kbest].score
    score_best = float(population[0].score)
    smin = score_best
    kbest = 0
    for k in range (1, npopulation):
        population[k].copy(population[0])
        population[k].evolve()
        population[k].skill(matchup_set, train_start, train_end, metric = VICKIE)
        if (population[k].score < score_best):
            kbest = k
            smin = population[k].score
            bests.append(critter(nparameters))
            bests[nbests].init(population[kbest].weights, population[kbest].sdevs)
            nbests += 1
    if (kbest != 0):
        if (score_gfs != 0):
          print("new best ",gen, kbest, smin, score_best, smin/score_gfs, flush=True)
        else:
          print("new best ",gen, kbest, smin, score_best, flush=True)
        population[kbest].show()


new best  1 8 10.96228641382222 16.964669848958508 0.653642933527888
0 -0.692  0.324  1 -0.411  0.482  2 -1.303  0.681  3 -2.620  1.099  4 0.441  0.959  5 -1.070  0.751  score  10.962  
new best  8 7 10.31321686260833 10.96228641382222 0.6149411783006088
0 -0.549  0.646  1 -0.574  1.839  2 -1.223  0.815  3 -2.435  0.484  4 0.523  1.223  5 -1.064  0.394  score  10.313  
new best  37 9 10.019256547045327 10.31321686260833 0.5974133491824869
0 0.467  0.735  1 -1.107  1.369  2 -0.876  0.426  3 -2.650  0.361  4 0.516  0.126  5 -0.254  1.017  score  10.019  
new best  106 5 9.761512697393432 10.019256547045327 0.5820449817064455
0 1.022  0.382  1 -1.205  3.077  2 -1.102  0.954  3 -2.317  0.412  4 0.630  1.092  5 -0.060  0.186  score  9.762  
new best  206 1 9.361061365122277 9.761512697393432 0.5581674644003087
0 0.721  0.467  1 0.042  1.058  2 -1.252  0.186  3 -1.650  1.147  4 0.327  0.211  5 0.056  0.986  score  9.361  
new best  297 9 9.205604067619817 9.361061365122277 0.54889808754388
0

Now consider what we found along the way

In [7]:
######################## ######################## ########################
if (score_gfs != 0):
  print("best score in training period ",gen, kbest, smin, score_best, smin/score_gfs, flush=True)
else:
  print("best score in training period ",gen, kbest, smin, score_best, flush=True)
print("score in the untrained period: ",population[kbest].skill(matchup_set, train_end+1, nobs))

print("found ",nbests,"new bests along the way\n")
for k in range (0, nbests):
  bests[k].show()
  print("\n")



best score in training period  3599 0 7.5682642403068145 7.5682642403068145 0.45126922003340625
score in the untrained period:  4.825696882199806
found  14 new bests along the way

0 0.000  0.000  1 0.000  0.000  2 0.000  0.000  3 0.000  0.000  4 0.000  0.000  5 0.000  0.000  score  99.000  


0 0.950  1.000  1 -0.151  1.000  2 -0.103  1.000  3 0.411  1.000  4 0.144  1.000  5 1.454  1.000  score  99.000  


0 -0.672  1.000  1 -0.360  1.000  2 -0.813  1.000  3 -1.726  1.000  4 0.177  1.000  5 -0.402  1.000  score  99.000  


0 -0.692  0.324  1 -0.411  0.482  2 -1.303  0.681  3 -2.620  1.099  4 0.441  0.959  5 -1.070  0.751  score  99.000  


0 -0.549  0.646  1 -0.574  1.839  2 -1.223  0.815  3 -2.435  0.484  4 0.523  1.223  5 -1.064  0.394  score  99.000  


0 0.467  0.735  1 -1.107  1.369  2 -0.876  0.426  3 -2.650  0.361  4 0.516  0.126  5 -0.254  1.017  score  99.000  


0 1.022  0.382  1 -1.205  3.077  2 -1.102  0.954  3 -2.317  0.412  4 0.630  1.092  5 -0.060  0.186  score  99.000 

In [None]:
print("Forecasts in the training period:")
population[0].show_fcst(matchup_set, train_start, train_end)
print("Untrained forecasts:")
population[0].show_fcst(matchup_set, train_end, nobs)
