In [23]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from pulp import *
from copy import deepcopy
while "notebooks" in os.getcwd():
    os.chdir("..")

from src.preprocessing.parser import Parser
from src.preprocessing.preprocessor import Preprocessor
from src.solvers.solution import Solution
from src.solvers.greedy import GreedySolver
from src.solvers.pulp_solver import PuLPSolver
from src.solvers.dp import DPSolver
from src.solvers.base_solver import DPMethods
from src.solvers.bb import BBSolver

from time import time
from tqdm import tqdm
from typing import Dict

In [25]:
def get_num_triples(dataset : Dict):
    num_triples= 0
    for n in dataset.keys():
        num_triples += len(dataset[n])
    
    return num_triples

## Test 04

In [26]:
parser = Parser("data/testfiles/test4.txt")

In [27]:
t0 = time()
info = parser.read()
t1 = time()

print(f"Total execution time {t1 - t0} s")

6776it [00:00, 24305.84it/s]

76804it [00:02, 34161.19it/s]


Total execution time 6.635010242462158 s


In [28]:
dataset : Dict[int, pd.DataFrame ]= info['data']
p, K, M, N = info['p'], info['K'], info['M'], info['N']

In [29]:
preprocessing_results = {}
preprocessor = Preprocessor(
    K, 
    M,
    N,
    p,
    dataset
)

t0 = time()

data_1 = preprocessor.remove_trivial_values()
t1 = time()

preprocessing_results['remove trivial values'] = {
    "runtime": (t1 - t0),
    "num_triples": get_num_triples(data_1)
}
t1 = time()

data_2 = preprocessor.remove_ip_dominated(dataset)
t2 = time()
preprocessing_results['remove IP dominated'] = {
    "runtime": t2-t1,
    "num_triples": get_num_triples(data_2)
}
t2 = time()

data_3 = preprocessor.remove_lp_dominated(data_2)
t3 = time()
preprocessing_results['remove LP dominated'] = {
    "runtime": t3-t2,
    "num_triples": get_num_triples(data_3)
}


  0%|          | 0/640 [00:00<?, ?it/s]

100%|██████████| 640/640 [00:00<00:00, 6811.68it/s]
100%|██████████| 640/640 [03:09<00:00,  3.37it/s]
100%|██████████| 640/640 [00:05<00:00, 119.39it/s]


In [30]:
pd.DataFrame(preprocessing_results)

Unnamed: 0,remove trivial values,remove IP dominated,remove LP dominated
runtime,20.145691,189.846306,17.184645
num_triples,614400.0,14688.0,4974.0


## Greedy algorithm

In [31]:
lp_results = {}

In [33]:
pulp_solver = PuLPSolver(
    K, 
    M,
    N,
    p,
    data_3
)

t1 = time()
pulp_solver.solve()
t2 = time()

lp_results['pulp'] = {
    'runtime': t2-t1,
    'data_rate' : pulp_solver.solution.objective.value()
}

In [34]:
solver = GreedySolver(
    K, 
    M,
    N,
    p,
    data_3
)

t1 = time()
solver.solve(data_3)
t2 = time()

lp_results['greedy'] = {
    'runtime': t2-t1,
    'data_rate' : solver.solution.get_data_rate()
}


In [35]:
pd.DataFrame(lp_results)

Unnamed: 0,pulp,greedy
runtime,0.11252,1.155703
data_rate,9870.321839,9870.321839


## Solutions to the ILP

In [36]:
ilp_results = {}

In [37]:
dp_solver = DPSolver(
    K, 
    M,
    N,
    p,
    data_3
)

t1 = time()
dp_solver.solve(
    None,
    method = DPMethods.MAXIMIZE_R
)
t2 = time()
ilp_results['DP_maximize_r'] = {
    'runtime' : t2 - t1,
    "data_rate" : dp_solver.solution.get_data_rate()
}

pulp_solver.solve()
U = int(pulp_solver.solution.objective.value())

t1 = time()
dp_solver.solve(
    None,
    method = DPMethods.MINIMIZE_P,
    U = U
)
t2 = time()

ilp_results['DP_minimize_p'] = {
    'runtime' : t2 - t1,
    "data_rate" : dp_solver.solution.get_data_rate()
}

### Branch and Bound

In [38]:
bb_solver = BBSolver(
    K, 
    M,
    N,
    p,
    data_3
)

In [40]:
t1 = time()
bb_solver.solve()
t2 = time()

ilp_results['Branch&Bound'] = {
    'runtime' : t2 - t1,
    "data_rate" : bb_solver.best_rate
}

KeyboardInterrupt: 

In [41]:
pd.DataFrame(ilp_results)

Unnamed: 0,DP_maximize_r,DP_minimize_p
runtime,81.193411,40.678555
data_rate,9870.0,9870.0
