# Testing BeeColonyAlgorithm

In [1]:
from reader import Reader

In [2]:
from datetime import datetime

### Reading in the data

In [3]:
start = datetime(2015, 5, 15)
end = datetime(2015, 5, 31)

In [4]:
%%time
net = Reader.read_flights('../../../data', start, end)

  flights_df = pd.read_csv(data_dir / 'flights.csv')


CPU times: user 34.5 s, sys: 2.26 s, total: 36.7 s
Wall time: 37.5 s


### Testing Random DFS Search

In [5]:
source = net.airports[14]
target = net.airports[72]

print(source.name, '->', target.name)

Albany International Airport -> City of Colorado Springs Municipal Airport


In [6]:
%%time
path = net.random_dfs_search(source, target)

CPU times: user 103 ms, sys: 173 µs, total: 104 ms
Wall time: 103 ms


In [7]:
len(path)

5

In [22]:
for flight in path:
    print(flight.origin.name, '->', flight.destination.name)
    print(flight.departure, flight.arrival)
    print(flight.price)
    print()

Albany International Airport -> Tampa International Airport
2015-05-15 07:30:00 2015-05-15 10:35:00
201.93

Tampa International Airport -> Dallas Love Field
2015-05-15 16:25:00 2015-05-15 19:05:00
174.62

Dallas Love Field -> San Francisco International Airport
2015-05-15 19:25:00 2015-05-15 23:15:00
305.37

San Francisco International Airport -> Salt Lake City International Airport
2015-05-16 06:00:00 2015-05-16 07:57:00
103.63

Salt Lake City International Airport -> City of Colorado Springs Municipal Airport
2015-05-17 15:30:00 2015-05-17 17:08:00
148.0



## BeeColonyConfiguration

In [23]:
from dataclasses import dataclass
from datetime import timedelta

In [24]:
@dataclass
class BeeColonyConfiguration:
    # user's limitations
    from_datetime: datetime
    to_datetime: datetime
    max_cost: float  # not used right now, simply need to add one more element to stack in dfs
    transfer_time: timedelta  # not used right now, needs to alter Airport objects
    max_transfers: int  # not used right now, need to set max_depth for dfs

    # cost function parameters
    time_priority: float  # [0, 1] value, 0 means that price is the most important, 1 means that time is
    
    # algorithm hyperparameters
    iterations: int
    
    scout_bees: int
        
    best_sites: int
    elite_sites: int
    elite_sites_bees: int
    rest_sites_bees: int
        
    max_shrinkages: int

In [26]:
configuration = BeeColonyConfiguration(
    from_datetime = datetime(2015, 5, 18),
    to_datetime = datetime(2015, 5, 21, 3, 0, 0),
    max_cost = 10000,
    transfer_time = timedelta(minutes=30),
    max_transfers = 5,
    time_priority = 0.65,
    iterations = 100,
    scout_bees = 20,
    best_sites = 10,
    elite_sites = 4,
    elite_sites_bees = 4,
    rest_sites_bees = 2,
    max_shrinkages = 3
)

## BeeColonyAlgorithm

In [70]:
from operator import attrgetter
from typing import List, Set, Iterator
from copy import copy

from tqdm import trange

from data import Network, Flight, Airport

In [71]:
class SolutionPath:
    def __init__(self, algorithm: BeeColonyAlgorithm, path: List[Flight]) -> None:
        self._path = copy(path)
        self._cost = algorithm.cost_function(path)

    def __len__(self) -> int:
        return len(self._path)
    
    def __getitem__(self, index: int) -> Flight:
        return self._path[index]
    
    def __iter__(self) -> Iterator:
        return iter(self._path)
        
    @property
    def path(self) -> List[Flight]:
        return self._path
    
    @property
    def cost(self) -> float:
        return self._cost

    
class Neighborhood:
    # TODO try out with frozen nodes, not flights, it should give us much better results
    def __init__(self, algorithm: BeeColonyAlgorithm, path: SolutionPath) -> None:
        self._algorithm = algorithm
        
        self._center_path: SolutionPath = path
        self._frozen_flights: List[Flight] = [path[0]]

        self._foragers = 0
        self._times_shrunk = 0
        self._abandoned = False

    @property
    def best_path(self) -> SolutionPath:
        return self._center_path
        
    @property
    def best_cost(self) -> float:
        return self._center_path.cost
        
    @property
    def times_shrunk(self) -> int:
        return self._times_shrunk

    @property
    def abandoned(self) -> bool:
        return self._abandoned
    
    def recruit(self, foragers_count: int):
        self._foragers = foragers_count

    def shrink(self) -> None:
        # if there is no more room to shrink (all the flights on the path are already frozen)
        if len(self._frozen_flights) == len(self._center_path):
            self._abandoned = True
            return

        newly_frozen_flight = self._center_path[len(self._frozen_flights)]
        self._frozen_flights.append(newly_frozen_flight)

        self._times_shrunk += 1
        
        if self._times_shrunk >= self._algorithm.configuration.max_shrinkages:
            self._abandoned = True
        
    def local_search(self) -> None:
        net = self._algorithm.network
        improved = False
        
        for _ in range(self._foragers):
            frozen_origin = self._frozen_flights[-1].destination
            
            path = self._frozen_flights + net.random_dfs_search(
                frozen_origin,
                self._center_path[-1].destination,
                self._frozen_flights[-1].arrival + frozen_origin.transfer_time
            )
            
            solution = SolutionPath(self._algorithm, path)
            
            if solution.cost < self.best_cost:
                self._center_path = solution
                improved = True
                
        if not improved:
            self.shrink()
            

class BeeColonyAlgorithm:
    def __init__(
        self,
        network: Network,
        configuration: BeeColonyConfiguration
    ) -> None:
        
        self._configuration = configuration
        self._network = network.filter_by_date(configuration.from_datetime, configuration.to_datetime)

    @property
    def network(self) -> Network:
        return self._network
    
    @property
    def configuration(self) -> BeeColonyConfiguration:
        return self._configuration
        
    def cost_function(self, path: List[Flight]) -> float:
        overall_time = (path[-1].arrival - path[0].departure) // timedelta(minutes=1)
        overall_price = sum(flight.price for flight in path)
        
        tp = self._configuration.time_priority
        # TODO how do we scale time and price
        return overall_price * (1 - tp) + (overall_time) * tp
        
    def global_search(self, source: Airport, target: Airport) -> Neighborhood:
        return Neighborhood(
            self,
            SolutionPath(self, self._network.random_dfs_search(source, target))
        )
        
    def run(self, source: Airport, target: Airport) -> SolutionPath:
        conf = self._configuration
        
        sites = [self.global_search(source, target) for _ in range(conf.scout_bees)]
        best_path = sites[0].best_path
        
        for _ in trange(conf.iterations):
            sorted_sites = sorted(sites, key=attrgetter('best_cost'))[:conf.best_sites]
            
            if sorted_sites[0].best_cost < best_path.cost:
                best_path = sorted_sites[0].best_path
            
            elite_sites = sorted_sites[:conf.elite_sites]
            rest_sites = sorted_sites[conf.elite_sites:]

            for site in elite_sites:
                site.recruit(conf.elite_sites_bees)
            for site in rest_sites:
                site.recruit(conf.rest_sites_bees)
                
            for site in sorted_sites:
                site.local_search()
                
            # constructing list of sites for the next iteration
            left_sites = [
                site for site in sorted_sites if not site.abandoned
            ]
            
            new_sites = [
                self.global_search(source, target)
                for _ in range(conf.scout_bees - len(left_sites))
            ]
            
            sites = left_sites + new_sites
            
        return best_path

In [64]:
algorithm = BeeColonyAlgorithm(net, configuration)

In [65]:
algorithm.cost_function(path)

97.86650000000002

In [74]:
overall_time = (path[-1].arrival - path[0].departure) // timedelta(minutes=1)
overall_price = sum(flight.price for flight in path)

overall_time, overall_price

(325, 228.2)

In [68]:
%%time
path = algorithm.run(source, target)
path.cost

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [05:46<00:00,  3.47s/it]

CPU times: user 5min 49s, sys: 248 ms, total: 5min 49s
Wall time: 5min 49s





291.12

In [73]:
for flight in path:
    print(flight.origin.name, '->', flight.destination.name)
    print(flight.departure, flight.arrival)
    print(flight.price)
    print()

Albany International Airport -> Chicago O'Hare International Airport
2015-05-18 06:49:00 2015-05-18 09:10:00
94.58

Chicago O'Hare International Airport -> City of Colorado Springs Municipal Airport
2015-05-18 09:30:00 2015-05-18 12:14:00
133.62

