In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt

# ---------------------------
# Step 1: Define Dataset
# ---------------------------
data = {
    'TV': [230.1, 44.5, 17.2, 151.5, 180.8],
    'Radio': [37.8, 39.3, 45.9, 41.3, 12.8],
    'Newspaper': [69.2, 45.1, 69.3, 58.5, 58.4],
    'Sales': [22.1, 10.4, 9.3, 18.5, 12.9]
}
df = pd.DataFrame(data)

# ---------------------------
# Step 2: Preprocess Data
# ---------------------------
X = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ---------------------------
# Step 3: Train ML Model
# ---------------------------
model = RandomForestRegressor(random_state=42)
model.fit(X_train_scaled, y_train)

# ---------------------------
# Step 4: Define GA Functions
# ---------------------------
def generate_population(size):
    return [np.array([np.random.uniform(0, 300),  # TV
                      np.random.uniform(0, 50),   # Radio
                      np.random.uniform(0, 100)]) # Newspaper
            for _ in range(size)]

def fitness(chromosome):
    input_scaled = scaler.transform([chromosome])
    return model.predict(input_scaled)[0]

def crossover(parent1, parent2):
    point = np.random.randint(1, 3)
    return np.concatenate((parent1[:point], parent2[point:]))

def mutate(chromosome, rate=0.1):
    if np.random.rand() < rate:
        gene = np.random.randint(3)
        chromosome[gene] += np.random.uniform(-10, 10)
    return chromosome

# ---------------------------
# Step 5: Genetic Algorithm Loop
# ---------------------------
population_size = 10
generations = 50
population = generate_population(population_size)
best_fitness_over_time = []

for gen in range(generations):
    # Sort by fitness (descending)
    population = sorted(population, key=lambda x: -fitness(x))
    best_fitness_over_time.append(fitness(population[0]))

    # Selection: top 2
    parents = population[:2]

    # Generate children via crossover + mutation
    children = [mutate(crossover(parents[0], parents[1])) for _ in range(population_size - 2)]

    # New generation
    population = parents + children

# ---------------------------
# Step 6: Output Best Solution
# ---------------------------
best_solution = max(population, key=fitness)
best_prediction = fitness(best_solution)

print("✅ Best Budget Allocation:")
print(f"TV:        ${best_solution[0]:.2f}K")
print(f"Radio:     ${best_solution[1]:.2f}K")
print(f"Newspaper: ${best_solution[2]:.2f}K")
print(f"\n🎯 Predicted Sales: {best_prediction:.2f}K units")

# ---------------------------
# Step 7: Plot Convergence
# ---------------------------
plt.plot(best_fitness_over_time, marker='o')
plt.title("Genetic Algorithm Optimization Over Generations")
plt.xlabel("Generation")
plt.ylabel("Predicted Sales (Fitness)")
plt.grid(True)
plt.tight_layout()
plt.show()