In [58]:
import pandas as pd
import numpy as np

In [59]:
forecasted_info = pd.read_csv("ForecastedInformation.csv")

### Helper Functions for Differential Evolution

In [60]:
# Creates a population with population size rows and 2N columns (1 for each parameter)
def initialize_population(bounds, population_size):
    population = np.zeros((population_size, len(bounds)))
    for i in range(len(bounds)):
        population[:, i] = np.random.uniform(bounds[i][0], bounds[i][1], population_size)
    return population

In [61]:
def mutate(individual_index, population, mutation_factor, bounds):
    indices = np.array([i for i in range(len(population)) if i != individual_index])
    selected_indices = np.random.choice(indices, 3, replace=False)
    a, b, c = population[selected_indices[0]], population[selected_indices[1]], population[selected_indices[2]]
    mutant = a + mutation_factor * (b - c)
    mutant = np.clip(mutant, [bound[0] for bound in bounds], [bound[1] for bound in bounds])
    return mutant

In [62]:
def crossover(target, mutant, crossover_prob):
    # Generate a mask based on the crossover probability
    mask = np.random.rand(len(target)) < crossover_prob
    # Apply the mask to choose elements from the mutant
    trial = np.where(mask, mutant, target)
    return trial

In [63]:
def differential_evolution(objective_function, parameter_bounds, population_size, n_generations, mutation_factor, p_crossover):
    # Initialize population and maintain current fitnesses
    population = initialize_population(parameter_bounds, population_size)
    fitness = np.array([objective_function(individual) for individual in population])

    for _ in range(n_generations):
        new_population = population.copy()
        for i in range(population_size):
            # Mutation
            mutant = mutate(i, population, mutation_factor, parameter_bounds)
            # Crossover
            trial = crossover(population[i], mutant, p_crossover)
            # Evaluation
            trial_fitness = objective_function(trial)
            if trial_fitness > fitness[i]:
                new_population[i] = trial
                fitness[i] = trial_fitness
    best_index = np.argmax(fitness)
    return population[best_index], fitness[best_index]

### Inputs to the function

In [64]:
# Generate bounds for each SKU
num_skus = len(forecasted_info["Pcode"].unique())

# Policy decision period
max_order_frequency = 10 # In weeks, this is up to the user!

# Parameters are a 2*num_skus by 2 matrix
# Each entry contains array of upper and lower bound
parameter_bounds = np.zeros((2*num_skus, 2))
# All lower bounds remain 0
for i, sku in enumerate(forecasted_info["Pcode"].unique()):
    temp_df = forecasted_info[forecasted_info["Pcode"] == sku]
    parameter_bounds[2*i][1] = int(temp_df["Quantity Sold"].max())
    parameter_bounds[2*i+1][1] = max_order_frequency

# Some additional hyperparameters to tune of differential evolution
population_size = 100 
n_generations = 100 # This is QUITE susceptible to change
mutation_factor = 1 # This has to be a constant from 0 to 2
p_crossover = 0.3 # between 0 and 1

### Define objective functions and variables that it needs

In [65]:
# Variable definitions
num_skus = num_skus # As defined above
policy_optimization_period = 5 # Should equal max_order_frequency from above
df = forecasted_info # Contains all info about each sku
# Should convert it into the following form:
skus = forecasted_info["Pcode"].unique()
# Then convert this into the forms we want for optimization
prices = np.zeros((num_skus, 1))
forecast_weeks = [104, 105, 106, 107, 108] # Somehow, need to relate this to policy optimization period
quantities = np.zeros((num_skus, len(forecast_weeks)))
for j, sku in enumerate(skus):
    temp_df = forecasted_info[forecasted_info["Pcode"] == sku]
    prices[j] = (temp_df.iloc[0]["Price"][1:])
    # ELIZA TODO: figure out why accidentally created duplicates when created the forecasted information df
    quantity_df = temp_df[temp_df["Week"].isin(forecast_weeks)].iloc[0:len(forecast_weeks)]
    quantities[j] = np.array(quantity_df["Quantity Sold"])
# NOW: prices and quantities have the info we need

In [66]:
# Define objective function based on values above
def f(individual):
    # Individual is a list of 2N values which is parameters for each SKU in the space
    # Combination of 1) Missed Sales 2) Inventory 3) Cost of buying stuff

    # Calculation of Missed Sales

    # For each SKU, start with first order simulation by getting even individual entries
    sku_quantities = np.zeros((num_skus, 1))
    frequencies = individual[1::2]
    # Then starting at the first week, try to enact the policy
    missed_sales = 0
    inventory = 0
    reorder_cost = 0
    for i in range(0, policy_optimization_period): 
        # Calculate the amount from that week from the df
        # Subtract it from the sku_quantity
        for j in range(num_skus):
            # Account for potential restock (but don't double count the first week)
            if frequencies[j] % i == 0:
                sku_quantities[j] += individual[2*j] # Add the amount from reorder

                # Cost of buying stuff (reorder_cost)
                # TODO: Sidd, can you change the costs to be nonzero amounts even though we would fabricate this data somehow
                # NOTE: I am currently pretending the cost is just 1
                reorder_cost += 1 # Here, instead, we would just do cost[j] or whever but we need a cost array

            # quantities j i is the amount of sku j sold in week i
            sku_quantities[j] = sku_quantities[j] - quantities[j][i]
            if sku_quantities[j] < 0:
                missed_sales += (-1)*sku_quantities[j]*prices[j] #-1 because sku quantities is negative here and we add to missed sales
                sku_quantities[j] = 0

        # This just calculates the inventory quantity
        inventory += sum(sku_quantities) # TODO Sidd- you can add all your notes about how much space each takes up and whatever here
    
    return missed_sales + inventory + reorder_cost

In [67]:
#### DIFFERENTIAL EVOLUTION INPUTS ####
objective_function = f
parameter_bounds = parameter_bounds
populationSize = population_size
n_generations = n_generations
mutation_factor = mutation_factor
p_crossover = 0.3

best_parameters, score = differential_evolution(objective_function, parameter_bounds, population_size, n_generations, mutation_factor, p_crossover)

Next steps
(Sidd, def do the ones I put your name by and hopefully the rest of them too except for 5 lol)
(also, currently takes 2.5 minutes to run which def we cld cut down! compare to this number for stats)
1. Make the objective function significantly faster (remove for loops)
2. Figure out how to use numpy to remove the inner for loop in the algorithm
3. (Sidd) allow the inventory and reorder cost to take in the volumes and cost of reordering each indiviudal item / fix them if they are not how u want them
4. Implement hyperparameter search– what does this mean? For populationSize, n_generations, mutation_factor and p_crossover, run the algorithm for a grid of specific quantities and find the best set of these configurations that allow for lowest score (we have to be careful about the best way to do this, might be some sort of optimization as well– Sidd you can think through this)
5. (Eliza) figure out why the ForecastedInformation.csv has duplicates lol (I just removed them for now)
6. Benchmark our progress– not sure how we would do this– lmk what you think Sidd

NOTE: user inputs
- policy_optimization_period
- forecast_weeks (which exact weeks of df we are forecasting)