In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

cars = np.genfromtxt("cars.csv", delimiter=",", skip_header=True)

car_col_min = np.min(cars, axis=0)
car_col_max = np.max(cars, axis=0)
car_nrm = (cars - car_col_min) / (car_col_max - car_col_min)

cars_num_rows = cars[:, 1:7]
cars_num_rows_min = np.min(cars_num_rows, axis=0)
cars_num_rows_max = np.max(cars_num_rows, axis=0)

cars_column_mpg = cars[:, 7]
cars_column_mpg_min = np.min(cars_column_mpg)
cars_column_mpg_max = np.max(cars_column_mpg)

cars_num_rows_normalized = (cars_num_rows - cars_num_rows_min) / (cars_num_rows_max - cars_num_rows_min)
cars_mpg_normalized = (cars_column_mpg - cars_column_mpg_min) / (cars_column_mpg_max - cars_column_mpg_min)


In [32]:
#TODO 1: create weighted function with coefficient for each column parameter

def weighted_function (x, coefficients):
    return np.dot(x, coefficients)

def offspring(parent):
    return parent + np.random.uniform(low= -0.1, high=0.1, size=parent.shape)

def coeffients(number):
    cV = np.random.uniform(low= -1.0, high=1.0, size=number)
    return cV

def fitness_function (data, coefficients):
    return mean_squared_error(cars_mpg_normalized, weighted_function(data, coefficients))

num_parents = 1
num_children = 3
num_generations = 300
mutation_std = 0.05

parents = np.random.uniform(low=-1, high=1, size=(num_parents, cars_num_rows.shape[1]))

best_fitness = None
best_coeffs = None

for i in range(num_generations):
    # generate children
    children = np.tile(parents, (num_children, 1))
    children += np.random.normal(scale=mutation_std, size=children.shape)
    # compute fitness for parents and children
    pop = np.concatenate((parents, children))
    # fitness = np.empty((0,3), float)
    # for individual in pop:
    #     fitness.append(fitness_function(cars_num_rows_normalized, individual))

    fitness = np.array([fitness_function(cars_num_rows_normalized, individual) for individual in pop])
    #print(fitness)
    # select parents
    parents = pop[np.argsort(fitness)[:num_parents]]
    # store best individual
    best_idx = np.argmin(fitness)
    if i == 0:
        initial_rmse = fitness[best_idx] * (cars_column_mpg_max - cars_column_mpg_min) + cars_column_mpg_min
        print(f"Initial RMSE: {initial_rmse:.2f}")
    if best_fitness is None or fitness[best_idx] < best_fitness:
        best_fitness = fitness[best_idx]
        best_coeffs = pop[best_idx]

best_fitness_denormalized = best_fitness * (cars_column_mpg_max - cars_column_mpg_min) + cars_column_mpg_min


cars_Tcolumns_mpg = cars_column_mpg[[3,56,116,218]]
given_lines = cars[[3,56,116,218],1:7]
given_lines_nrm = car_nrm[[3,56,116,218],1:7]

# TODO correct!
predict = np.dot(given_lines_nrm[0],best_coeffs) * (cars_column_mpg_max - cars_column_mpg_min) + cars_column_mpg_min

#predict only mpg
indx = 0
for car in given_lines:
    print(f"mpg is {cars_Tcolumns_mpg[indx]}, predicted is "
          f"{weighted_function(car, best_coeffs)}")
    indx += 1


print(f"Final RMSE: {best_fitness_denormalized:.2f}")

Initial RMSE: 35.85
mpg is 16.0, predicted is -3254.809619897512
mpg is 24.0, predicted is -2140.510317793978
mpg is 29.0, predicted is -1763.96518779036
mpg is 33.5, predicted is -1826.8751715167239
Final RMSE: 9.51
