In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

cars = np.genfromtxt("cars.csv", delimiter=",", skip_header=True)

car_col_min = np.min(cars, axis=0)
car_col_max = np.max(cars, axis=0)
car_nrm = (cars - car_col_min) / (car_col_max - car_col_min)

cars_num_rows = cars[:, 1:7]
cars_num_rows_min = np.min(cars_num_rows, axis=0)
cars_num_rows_max = np.max(cars_num_rows, axis=0)

cars_column_mpg = cars[:, 7]
cars_column_mpg_min = np.min(cars_column_mpg)
cars_column_mpg_max = np.max(cars_column_mpg)

cars_num_rows_normalized = (cars_num_rows - cars_num_rows_min) / (cars_num_rows_max - cars_num_rows_min)
cars_mpg_normalized = (cars_column_mpg - cars_column_mpg_min) / (cars_column_mpg_max - cars_column_mpg_min)


In [3]:
#TODO 1: create weighted function with coefficient for each column parameter

def weighted_function (x, coefficients):
    return np.dot(x, coefficients)

def offspring(parent):
    return parent + np.random.uniform(low= -0.1, high=0.1, size=parent.shape)

def coeffients(number):
    cV = np.random.uniform(low= -1.0, high=1.0, size=number)
    return cV

def fitness_function (data, coefficients):
    return mean_squared_error(cars_mpg_normalized,weighted_function(data, coefficients))

num_parents = 1
num_children = 3
num_generations = 300
mutation_std = 0.05

parents = np.random.uniform(low=-1, high=1, size=(num_parents, cars_num_rows.shape[1]))

best_fitness = None
best_coeffs = None

for i in range(num_generations):
    # generate children
    children = np.tile(parents, (num_children, 1))
    children += np.random.normal(scale=mutation_std, size=children.shape)
    # compute fitness for parents and children
    pop = np.concatenate((parents, children))
    # fitness = np.empty((0,3), float)

    fitness = np.array([fitness_function(cars_num_rows_normalized, individual) for individual in pop])
    #print(fitness)
    # select parents
    parents = pop[np.argsort(fitness)[:num_parents]]
    # store best individual
    best_idx = np.argmin(fitness)
    if i == 0:
        initial_rmse = fitness[best_idx] * (cars_column_mpg_max - cars_column_mpg_min) + cars_column_mpg_min
        print(f"Initial RMSE: {initial_rmse:.2f}")
    if best_fitness is None or fitness[best_idx] < best_fitness:
        print(best_fitness)
        best_fitness = fitness[best_idx]
        best_coeffs = pop[best_idx]
        parents = [best_coeffs]

best_fitness_denormalized = best_fitness * (cars_column_mpg_max - cars_column_mpg_min) + cars_column_mpg_min


cars_Tcolumns_mpg = cars_column_mpg[[3,56,116,218]]
given_lines = cars[[3,56,116,218],1:7]
given_lines_nrm = car_nrm[[3,56,116,218],1:7]

#predict only mpg
indx = 0
for car in given_lines_nrm:
    print(f"mpg is {cars_Tcolumns_mpg[indx]}, predicted is "
          f"{np.dot(car,best_coeffs)* (cars_column_mpg_max - cars_column_mpg_min) + cars_column_mpg_min}")
    indx += 1


print(f"Final RMSE: {best_fitness_denormalized:.5f}")

Initial RMSE: 11.50
None
0.06652565575835286
0.048308540858601444
0.04740876076886138
0.04622762530624212
0.038437600061429586
0.034287672084644505
0.03193102315229202
0.03127296014946411
0.027621515907896525
0.02593987828509203
0.025389826843859084
0.02450419479895155
0.02369335657270537
0.023575802784466356
0.022280201902519198
0.020829102838142967
0.02066948665773839
0.020426033369439846
0.01883435720367807
0.018236087436785217
0.018128469121916416
0.01803542059593532
0.017941107993476358
0.017426455087772165
0.017058771132541395
0.01629785168254477
0.015741391138378993
0.015227388185250821
0.015056210995589752
0.014863855245070525
0.014820690956484882
0.014512598053694023
0.014178180646112927
0.014071917308995728
0.01405617504908642
0.013912850966431065
0.013742235172599116
0.013711137052943252
mpg is 16.0, predicted is 14.803760042428209
mpg is 24.0, predicted is 23.759380828817307
mpg is 29.0, predicted is 27.57028420417005
mpg is 33.5, predicted is 30.862242388609083
Final RMSE: