In [1]:
import pandas as pd
import numpy as np
import random
import math

## Data

In [2]:
data_scrubber = pd.read_csv("/Users/macdedieu/Desktop/data_optimizer/Dataset_1.csv", sep=';')
data_site = pd.read_csv("/Users/macdedieu/Desktop/data_optimizer/scenario_1.csv", sep=';')
data_scrubber_2 = pd.read_csv("/Users/macdedieu/Desktop/data_optimizer/Dataset_2.csv", sep=';')
data_site_2 = pd.read_csv("/Users/macdedieu/Desktop/data_optimizer/scenario_2.csv", sep=';')

In [3]:
data_scrubber['efficiency'] = data_scrubber['productivity'] / data_scrubber['time']

In [4]:
data_scrubber.head()

Unnamed: 0,scrubber,inventory,time,productivity,efficiency
0,A1,1,1.0,1225,1225.0
1,A2,1,1.5,1575,1050.0
2,A3,1,2.5,2475,990.0
3,A4,1,2.5,1750,700.0
4,A5,1,3.0,17500,5833.333333


In [5]:
data_site

Unnamed: 0,scenario,area,time
0,Site 1,23900,7
1,Site 2,19200,4
2,Site 3,16400,6
3,Site 4,16200,6
4,Site 5,33000,3


In [6]:
data_scrubber['productivity'].sum() >= data_site['area'].sum()

True

## GENERATE RANDOM ASSIGNEMENT 


In [7]:
import random

def generate_random_assignment(data_scrubber, data_site):
    scrubber_list = data_scrubber['scrubber'].tolist()
    site_list = data_site['scenario'].tolist()
    assigned_scrubbers = {}
    scrubber_num = len(scrubber_list)
    
    # check if there are enough scrubbers to cover all sites
    min_scrubbers = sum(data_site['area']) // data_scrubber['productivity'].max()
    if scrubber_num < min_scrubbers:
        raise ValueError(f"Not enough scrubbers available. Need at least {min_scrubbers}, but only have {scrubber_num}.")
    
    random.shuffle(scrubber_list)
    random.shuffle(site_list)
    assignment = []
    
    for site_idx, site_name in enumerate(site_list):
        scrubber_combination = []
        total_prod = 0
        
        while scrubber_list:
            scrubber_index = scrubber_list.pop(0)
            # check if scrubber productivity exceeds site area
            total_prod = sum(data_scrubber.loc[data_scrubber['scrubber'].isin(scrubber_combination)]['productivity'])
            if total_prod >= data_site.loc[data_site['scenario'] == site_name]['area'].values[0]:
                diff = total_prod - data_site.loc[data_site['scenario'] == site_name]['area'].values[0]
                max_diff = 1000
                if diff > max_diff:
                    break 

            if data_scrubber.loc[data_scrubber['scrubber'] == scrubber_index]['inventory'].values[0] >= 1:
                data_scrubber.loc[data_scrubber['scrubber'] == scrubber_index, 'inventory'] -= 1
                scrubber_combination.append(scrubber_index)
        
        if not scrubber_combination:
            raise ValueError(f"Not enough scrubbers available to cover Site {site_idx+1}.")
            
        assignment.append(scrubber_combination)
    
    return assignment, site_list


In [8]:
assignment = generate_random_assignment(data_scrubber, data_site)
assignment

([['A9'],
  ['D2', 'C3', 'A4', 'A8', 'A6', 'A1', 'A7'],
  ['A10', 'B2', 'B4', 'A5'],
  ['C1', 'A3', 'C2', 'B3', 'C4'],
  ['D1']],
 ['Site 1', 'Site 4', 'Site 3', 'Site 2', 'Site 5'])

## SIMULATED ANNEALING 

In [9]:
import random
import math

def cost_function(assignment, data_scrubber, data_site):
    excess = 0
    for site_idx, scrubber_idxs in enumerate(assignment):
        total_prod = sum(data_scrubber.loc[data_scrubber['scrubber'].isin(scrubber_idxs)]['productivity'])
        total_time = max(data_scrubber.loc[data_scrubber['scrubber'].isin(scrubber_idxs)]['time'])
        area = data_site.loc[data_site['scenario'] == data_site['scenario'].unique()[site_idx]]['area'].values[0]
        excess += max(0, total_prod - area)
    return excess, total_time

In [10]:
def generate_new_assignment(assignment, data_scrubber, data_site):
    scrubber_list = data_scrubber['scrubber'].tolist()
    site_list = data_site['scenario'].tolist()
    new_assignment = assignment.copy()
    
    # select two random sites
    site1_idx, site2_idx = random.sample(range(len(site_list)), 2)
    site1, site2 = site_list[site1_idx], site_list[site2_idx]
    
    # select two random scrubbers from each site
    site1_scrubber_idxs = [i for i, s in enumerate(assignment) if site_list[i] == site1]
    site2_scrubber_idxs = [i for i, s in enumerate(assignment) if site_list[i] == site2]
    if len(site1_scrubber_idxs) < 2 or len(site2_scrubber_idxs) < 2:
        # cannot swap if either site has less than 2 scrubbers assigned
        return new_assignment
    
    scrubber1_idx1, scrubber1_idx2 = random.sample(site1_scrubber_idxs, 2)
    scrubber2_idx1, scrubber2_idx2 = random.sample(site2_scrubber_idxs, 2)
    
    # check if the new assignment satisfies scrubber and site constraints
    new_site1_scrubbers = [idx for idx in new_assignment[site1_idx] if idx not in [scrubber1_idx1, scrubber1_idx2]]
    new_site2_scrubbers = [idx for idx in new_assignment[site2_idx] if idx not in [scrubber2_idx1, scrubber2_idx2]]

    if data_scrubber.loc[data_scrubber['scrubber'] == scrubber1_idx1]['inventory'].values[0] >= 1 and data_scrubber.loc[data_scrubber['scrubber'] == scrubber1_idx2]['inventory'].values[0] >= 1:
        new_site1_scrubbers.extend([scrubber1_idx1, scrubber1_idx2])
    else:
        # cannot assign scrubbers if inventory is not available
        return new_assignment
    
    if data_scrubber.loc[data_scrubber['scrubber'] == scrubber2_idx1]['inventory'].values[0] >= 1 and data_scrubber.loc[data_scrubber['scrubber'] == scrubber2_idx2]['inventory'].values[0] >= 1:
        new_site2_scrubbers.extend([scrubber2_idx1, scrubber2_idx2])
    else:
        # cannot assign scrubbers if inventory is not available
        return new_assignment
    
    total_prod1 = sum(data_scrubber.loc[data_scrubber['scrubber'].isin(new_site1_scrubbers)]['productivity'])
    total_prod2 = sum(data_scrubber.loc[data_scrubber['scrubber'].isin(new_site2_scrubbers)]['productivity'])
    area1 = data_site.loc[data_site['scenario'] == site1]['area'].values[0]
    area2 = data_site.loc[data_site['scenario'] == site2]['area'].values[0]

    if total_prod1/area1 > 1 or total_prod2/area2 > 1:
        # cannot assign scrubbers if productivity constraint is violated
        return new_assignment

    new_assignment[site1_idx] = new_site1_scrubbers
    new_assignment[site2_idx] = new_site2_scrubbers

    return new_assignment
    


In [11]:
def simulated_annealing(data_scrubber, data_site, T=1, alpha=0.99, stopping_T=0.000001, stopping_iter=100000):
    # initialize with a random assignment
    assignment = []
    scrubber_list = data_scrubber['scrubber'].tolist()
    site_list = data_site['scenario'].tolist()
    for site in site_list:
        site_scrubbers = random.sample(scrubber_list, random.randint(2, len(scrubber_list)))
        assignment.append([scrubbers for scrubbers in site_scrubbers])

    # keep track of the best assignment
    best_assignment = assignment
    best_cost, best_time = cost_function(assignment, data_scrubber, data_site)

    # simulated annealing
    iteration = 0
    while T > stopping_T and iteration < stopping_iter:
        new_assignment = generate_new_assignment(assignment, data_scrubber, data_site)
        new_cost, new_time = cost_function(new_assignment, data_scrubber, data_site)
        delta_cost = new_cost - best_cost
        
        if delta_cost < 0 or math.exp(-delta_cost/T) > random.uniform(0, 1):
            assignment = new_assignment
            
            if new_cost < best_cost:
                best_assignment = new_assignment
                best_cost = new_cost
                best_time = new_time
        
        T *= alpha
        iteration += 1

    return best_assignment, best_cost, site_list, best_time

In [12]:
best_assignement, best_cost, site, best_time = simulated_annealing(data_scrubber, data_site)

result = pd.DataFrame(
    {'Site': site,
     'Combination': best_assignement
    })

print(result)
print('Best Cost: ', best_cost)
print('Best Time: ', best_time)

     Site                                        Combination
0  Site 1                   [C3, A9, C5, A3, B1, B2, A5, D1]
1  Site 2  [D2, A7, B4, A1, C2, A2, C5, B3, B2, C3, A9, B...
2  Site 3  [A6, B1, A10, A4, B3, C3, A2, A8, A7, A3, B4, ...
3  Site 4  [B2, A4, C4, A1, C1, A8, A10, D2, B4, A2, A3, ...
4  Site 5  [A8, B1, B4, A7, A2, C5, A6, C1, A9, A3, B2, D...
Best Cost:  439690
Best Time:  8.0
