In [143]:
import numpy as np
import matplotlib.pyplot as plt
import numpy.random as rnd

# ---- Essentials ----

#plt.title("GDP x Lifespan correlation")
#plt.ylabel("average lifespan (years)") 
#plt.xlabel("GDP per capita in $")

cars = np.genfromtxt("cars.csv", delimiter=",", skip_header=True)


cyl = cars[:,1]
displ = cars[:,2]
hp = cars[:,3]
weight = cars[:,4]
acc = cars[:,5]
year = cars[:,6]

mpg = cars[:,7]


# ---- Normalizing ----

def normalizeMinMax(values):
    min = np.amin(values)
    max = np.amax(values)
    
    norm = (values-min)/(max-min)
    return norm

def deNormalize(values, origin):
    min = np.amin(origin)
    max = np.amax(origin)
    
    deNorm = values * (max - min) + min
    return deNorm



# ---- Approximation function ----

def rndApprox(xValues, k):
    
    if k is None:
        coeffs = np.array([
            rnd.uniform(-1,1),
            rnd.uniform(-1,1),
            rnd.uniform(-1,1),
            rnd.uniform(-1,1),
            rnd.uniform(-1,1),
            rnd.uniform(-1,1)])
        
    else:
        coeffs = np.array([
            rnd.uniform(k[0]-0.1, k[0]+0.1), 
            rnd.uniform(k[1]-0.1, k[1]+0.1),
            rnd.uniform(k[2]-0.1, k[2]+0.1),
            rnd.uniform(k[3]-0.1, k[3]+0.1), 
            rnd.uniform(k[4]-0.1, k[4]+0.1), 
            rnd.uniform(k[5]-0.1, k[5]+0.1)])
    
    
    y = np.matmul(xValues, coeffs)
    
   
    return y, coeffs


# ---- RMSE calculation function ----

def getRMSE(y, y1):
    y1 = y1.transpose() # <-- somehow this seems to be nessecary... :/
   
    rmse = np.sqrt(np.mean((y1-y)**2))
    
    return rmse



# ---- MAIN ----

# normalize
cylNorm = normalizeMinMax(cyl)
displNorm = normalizeMinMax(displ) 
hpNorm = normalizeMinMax(hp) 
weightNorm = normalizeMinMax(weight) 
accNorm = normalizeMinMax(acc) 
yearNorm = normalizeMinMax(year)
mpgNorm = normalizeMinMax(mpg)

# create matrix from the normalized xValues
xVals = np.column_stack((cylNorm, displNorm, hpNorm, weightNorm, accNorm, yearNorm))








# ---- Random Approximation ----

bestResult = rndApprox(xVals, None)
print("initial RMSE: ", '{:.4f}'.format(getRMSE(deNormalize(bestResult[0], mpg), mpg)))

for i in range(10000):
    result = rndApprox(xVals, None) # repeat
    if getRMSE(mpgNorm, result[0]) <  getRMSE(mpgNorm, bestResult[0]): # compare
        bestResult = result


bestResult = [deNormalize(bestResult[0], mpg), bestResult[1]] # denormalize

# print results
print("10.000 runs of random approximation...")
print("Best RMSE: ", '{:.4f}'.format(getRMSE(bestResult[0], mpg)))
print("Line 4:   mpg is", mpg[3], ", prediction was ", '{:.2f}'.format(bestResult[0][3]))
print("Line 57:  mpg is", mpg[56], ", prediction was ", '{:.2f}'.format(bestResult[0][56]))
print("Line 117: mpg is", mpg[116], ", prediction was ", '{:.2f}'.format(bestResult[0][116]))
print("Line 219: mpg is", mpg[219], ", prediction was ", '{:.2f}'.format(bestResult[0][219]))
print("\n\n----------------------------------\n")







# ---- Evolutionary Strategy ----

rounds = 300
childcount = 4
parentcount = 1
parents = [None]*parentcount


# function returns the weakest parent in parents
def getWeakestParent():
    weakest = parents[0] # parent object
    index = 0 # index of parent in parents
    for i in range(1, parentcount, 1):
        if getRMSE(mpgNorm, parents[i][0]) < getRMSE(mpgNorm, weakest[0]): # compare
                weakest = parents[i] # set parent as weakest
    return weakest, index




# initialize random parents
for i in range(parentcount):
    parents[i] = rndApprox(xVals, None)

print("initial RMSE: ", '{:.4f}'.format(getRMSE(deNormalize(parents[0][0], mpg), mpg)))

# generation loop
for i in range(rounds): 
    children = [None]*childcount
    
    for j in range(childcount):
        randomParent = 0 if parentcount == 1 else rnd.randint(0,parentcount-1)
        
        children[j] = rndApprox(xVals, parents[randomParent][1]) # take coefficients from random parent and feed them into rndApprox()
        
        if getRMSE(mpgNorm, children[j][0]) < getRMSE(mpgNorm, getWeakestParent()[0][0]): # compare child to weakest parent
                parents[getWeakestParent()[1]] = children[j] # replace weakest parent with child
        
    
# sorry for triple loop :( 
# technically it is double loop, since I chose parentcount = 1 anyway.

# determine fittest parent
bestResult = parents[0]

for i in range(1, parentcount, 1):
    if getRMSE(mpgNorm, parents[i][0]) < getRMSE(mpgNorm, bestResult[0]):
        bestResult = parents[i]
        

bestResult = [deNormalize(bestResult[0], mpg), bestResult[1]] # denormalize

print("Run ", rounds, " rounds ES(children:", childcount, " parents:", parentcount, ") ...")
print("Best RMSE: ", '{:.4f}'.format(getRMSE(bestResult[0], mpg)))
print("Line 4:   mpg is", mpg[3], ", prediction was ", '{:.2f}'.format(bestResult[0][3]))
print("Line 57:  mpg is", mpg[56], ", prediction was ", '{:.2f}'.format(bestResult[0][56]))
print("Line 117: mpg is", mpg[116], ", prediction was ", '{:.2f}'.format(bestResult[0][116]))
print("Line 219: mpg is", mpg[219], ", prediction was ", '{:.2f}'.format(bestResult[0][219]))



initial RMSE:  22.5487
10.000 runs of random approximation...
Best RMSE:  5.0209
Line 4:   mpg is 16.0 , prediction was  10.75
Line 57:  mpg is 24.0 , prediction was  21.85
Line 117: mpg is 29.0 , prediction was  25.39
Line 219: mpg is 17.5 , prediction was  16.85


----------------------------------

initial RMSE:  42.1950
Run  300  rounds ES(children: 4  parents: 1 ) ...
Best RMSE:  4.3981
Line 4:   mpg is 16.0 , prediction was  14.10
Line 57:  mpg is 24.0 , prediction was  23.01
Line 117: mpg is 29.0 , prediction was  27.86
Line 219: mpg is 17.5 , prediction was  17.66
