In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

cars = np.genfromtxt("cars.csv", delimiter=",", skip_header=True)

car_col_min = np.min(cars, axis=0)
car_col_max = np.max(cars, axis=0)
car_nrm = (cars - car_col_min) / (car_col_max - car_col_min)

cars_num_rows = cars[:, 1:7]
cars_num_rows_min = np.min(cars_num_rows, axis=0)
cars_num_rows_max = np.max(cars_num_rows, axis=0)

cars_column_mpg = cars[:, 7]
cars_column_mpg_min = np.min(cars_column_mpg)
cars_column_mpg_max = np.max(cars_column_mpg)

cars_num_rows_normalized = (cars_num_rows - cars_num_rows_min) / (cars_num_rows_max - cars_num_rows_min)
cars_mpg_normalized = (cars_column_mpg - cars_column_mpg_min) / (cars_column_mpg_max - cars_column_mpg_min)


In [2]:

def weighted_function (x, coefficients):
    return np.dot(x, coefficients)

def offspring(parent):
    return parent + np.random.uniform(low= -0.1, high=0.1, size=parent.shape)

def coeffients(number):
    cV = np.random.uniform(low= -1.0, high=1.0, size=number)
    return cV

def fitness_function (data, coefficients):
    return mean_squared_error(cars_mpg_normalized,weighted_function(data, coefficients))

num_parents = 1
num_children = 3
num_generations = 300
mutation_std = 0.05

parents = np.random.uniform(low=-1, high=1, size=(num_parents, cars_num_rows.shape[1]))

best_fitness = None
best_coeffs = None

for i in range(num_generations):
    # generate children
    children = np.tile(parents, (num_children, 1))
    children += np.random.normal(scale=mutation_std, size=children.shape)
    # compute fitness for parents and children
    pop = np.concatenate((parents, children))
    # fitness = np.empty((0,3), float)

    fitness = np.array([fitness_function(cars_num_rows_normalized, individual) for individual in pop])
    #print(fitness)
    # select parents
    parents = pop[np.argsort(fitness)[:num_parents]]
    # store best individual
    best_idx = np.argmin(fitness)
    if i == 0:
        initial_rmse = fitness[best_idx] * (cars_column_mpg_max - cars_column_mpg_min) + cars_column_mpg_min
        print(f"Initial RMSE: {initial_rmse:.2f}")
    if best_fitness is None or fitness[best_idx] < best_fitness:
        print(best_fitness)
        best_fitness = fitness[best_idx]
        best_coeffs = pop[best_idx]
        parents = [best_coeffs]

best_fitness_denormalized = best_fitness * (cars_column_mpg_max - cars_column_mpg_min) + cars_column_mpg_min


cars_Tcolumns_mpg = cars_column_mpg[[3,56,116,218]]
given_lines = cars[[3,56,116,218],1:7]
given_lines_nrm = car_nrm[[3,56,116,218],1:7]

#predict only mpg
indx = 0
for car in given_lines_nrm:
    print(f"mpg is {cars_Tcolumns_mpg[indx]}, predicted is "
          f"{np.dot(car,best_coeffs)* (cars_column_mpg_max - cars_column_mpg_min) + cars_column_mpg_min}")
    indx += 1


print(f"Final RMSE: {best_fitness_denormalized:.5f}")

Initial RMSE: 11.57
None
0.06839156244912412
0.06207664930637187
0.06087626989917101
0.05172735422561516
0.044868819083648585
0.041814066572922894
0.04070540485829589
0.040554544497871915
0.03537362889180738
0.033567870470943825
0.03260173523564325
0.029890035762180498
0.029836547823577895
0.0258320309542536
0.024223157263103114
0.023507355713807913
0.022372049714645587
0.020722930861833105
0.020577014825027287
0.01929092515133431
0.01913037553198809
0.018907952086186796
0.01843495835089312
0.018209995182471565
0.01737466225346286
0.01708587688740369
0.017040560044929536
0.016944734266780146
0.016885323922947854
0.016849541825543508
0.01650263387348474
0.016308228274564577
0.015974195987817023
0.015799055915093698
0.015580035090302655
0.015187845933183246
0.014715787945766784
0.014535603918226414
0.014019145689384052
0.01398655299902823
0.013962702250786989
0.013955011461271672
0.013856437899109737
mpg is 16.0, predicted is 15.694799246678011
mpg is 24.0, predicted is 23.20138461656657