In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random

random.seed(0)


def load_data_from_file(fileName="../Linear Regression/advertising.csv"):
    data = np.genfromtxt(fileName, dtype=None, delimiter=",", skip_header=1)
    features_X = data[:, :3]
    sales_Y = data[:, 3]
    features_X = np.hstack([np.ones((features_X.shape[0], 1)), features_X])
    return features_X, sales_Y

In [None]:
features_X, sales_Y = load_data_from_file()
print(features_X[:5, :])
print(sales_Y.shape)

In [None]:
def generate_random_value(bound=10):
    return (random.random() - 0.5) * bound


def create_individual(n=4, bound=10):
    individual = np.zeros(n)
    for i in range(n):
        individual[i] = generate_random_value(bound)

    return individual

In [None]:
individual = create_individual()
print(individual)

In [None]:
def compute_loss(individual):
    theta = np.array(individual)
    y_hat = features_X.dot(theta)
    loss = np.mean((y_hat - sales_Y) ** 2)
    return loss


def compute_fitness(individual):
    loss = compute_loss(individual)
    # fitness càng cao thì loss càng thấp
    fitness = 1 / (1 + loss)

    return fitness

In [None]:
individual = [4.09, 4.82, 3.10, 4.02]
fitness_score = compute_fitness(individual)
print(fitness_score)

In [None]:
def crossover(individual1, individual2, crossover_rate=0.9):
    individual1_new = individual1.copy()
    individual2_new = individual2.copy()

    for i in range(len(individual1)):
        if random.random() < crossover_rate:
            individual1_new[i], individual2_new[i] = (
                individual2_new[i],
                individual1_new[i],
            )
        else:
            pass

    return individual1_new, individual2_new

In [None]:
individual1 = [4.09, 4.82, 3.10, 4.02]
individual2 = [3.44, 2.57, -0.79, -2.41]
individual1, individual2 = crossover(individual1, individual2, 2.0)
print(" individual1 : ", individual1)
print(" individual2 : ", individual2)

In [None]:
def mutate(individual, mutation_rate=0.05):
    individual_m = individual.copy()
    for i in range(len(individual)):
        if random.random() < mutation_rate:
            individual_m[i] = generate_random_value()

    return individual_m

In [None]:
before_individual = [4.09, 4.82, 3.10, 4.02]
after_individual = mutate(individual, mutation_rate=1)
print(before_individual == after_individual)

In [None]:
def initializePopulation(m):
    population = [create_individual() for _ in range(m)]
    return population

In [None]:
population = initializePopulation(100)
print(len(population))

In [None]:
def selection(sorted_old_population, m):
    index1 = random.randint(0, m - 1)
    while True:
        index2 = random.randint(0, m - 1)
        if index2 != index1:
            break

    individual_s = sorted_old_population[index1]
    if index2 > index1:
        individual_s = sorted_old_population[index2]

    return individual_s

In [None]:
population = initializePopulation(m=100)
individual_s = selection(population, m=100)
print(individual_s)

In [None]:
def create_new_population(old_population, elitism=2, gen=1):
    m = len(old_population)
    sorted_population = sorted(old_population, key=compute_fitness)

    if gen % 1 == 0:
        print(
            "Generation: ",
            gen,
            "Best fitness score:",
            compute_fitness(sorted_population[m - 1]),
            "Best loss:",
            compute_loss(sorted_population[m - 1]),
            "with chromsome: ",
            sorted_population[m - 1],
        )

    new_population = []
    while len(new_population) < m - elitism:
        # selection
        individual1 = selection(sorted_population, m)
        individual2 = selection(sorted_population, m)

        # crossover
        individual1_new, individual2_new = crossover(individual1, individual2)

        # mutation
        individual1_new = mutate(individual1_new)
        individual2_new = mutate(individual2_new)

        # add new individuals to new population
        new_population.append(individual1_new)
        new_population.append(individual2_new)
        # copy elitism chromosomes that have best fitness score to the next generation
    for ind in sorted_population[m - elitism :]:
        new_population.append(ind)

    return new_population, compute_loss(sorted_population[-1])

In [None]:
old_population = initializePopulation(10)
new_population, _ = create_new_population(old_population, elitism=2, gen=1)
len(new_population)

In [None]:
def run_GA(n_generations=100, m=600):
    population = initializePopulation(m)
    losses_list = []
    for i in range(n_generations):
        population, loss = create_new_population(population, elitism=2, gen=i)
        losses_list.append(loss)

    return losses_list, population

In [None]:
import matplotlib.pyplot as plt


def visualize_loss(losses_list):
    plt.plot(losses_list)
    plt.title("Loss vs Generations")
    plt.xlabel("Generations")
    plt.ylabel("Loss")
    plt.show()


losses_list, population = run_GA(
    n_generations=100,
    m=600,
)
visualize_loss(losses_list)
population[-1]

In [None]:
def visualize_predict_gt():
  # visualization of ground truth and predict value
  sorted_population = sorted(population, key=compute_fitness)
  print(sorted_population[-1])
  theta = np.array(sorted_population[-1])

  estimated_prices = []
  for feature in features_X:
     # ************* your code here *************

  fig, ax = plt.subplots(figsize=(10, 6))
  plt.xlabel('Samples')
  plt.ylabel('Price')
  plt.plot(sales_Y, c='green', label='Real Prices')
  plt.plot(estimated_prices, c='blue', label='Estimated Prices')
  plt.legend()
  plt.show()

visualize_predict_gt()

In [None]:
# visualization of ground truth and predict value
sorted_population = sorted(population, key=compute_fitness)
print(sorted_population[-1])
theta = np.array(sorted_population[-1])

estimated_prices = []
samples = [i for i in range(len(features_X))]
for feature in features_X:
    estimated_price = sum(c * x for x, c in zip(feature, theta))
    estimated_prices.append(estimated_price)
fig, ax = plt.subplots(figsize=(10, 6))
# plt.plot(prices, c='green')
# plt.plot(estimated_prices, c='red')
plt.xlabel("Samples")
plt.ylabel("Price")
plt.scatter(samples, sales_Y, c="green", label="Real Prices")
plt.scatter(samples, estimated_prices, c="blue", label="Estimated Prices")
plt.legend()
plt.show()