In [18]:
import statistics
import math

In [19]:
from benchmarkrewriter.benchmark_parser import WorkerBenchmarkParser, BenchmarkParser
import os
includeWorkers = False
if includeWorkers:
    path = r'C:\Users\huda\Downloads\benchmarks_with_workers\benchmarks_with_workers'
    parser = WorkerBenchmarkParser()
else:
    path = r'C:\Users\huda\Documents\GitHub\scheduling_model_jrc\code\upgrades\benchmarks\all'
    parser = BenchmarkParser()

In [20]:
def calculate_flexibility(machines_for_all_operations, n_machines): 
    counts = []

    for m in machines_for_all_operations:
        counts.append(len(m))
    
    return statistics.mean(counts) / n_machines

def calculate_duration_variety(durations):
    counts = []

    for d in durations:
        counts.append(len(d))

    return len(set(counts)) / len(counts)

def calculate_average_operations(job_sequence):
    n_jobs = 1
    counts = [1]
    for i in range(1, len(job_sequence)):
        if job_sequence[i] != job_sequence[i-1]:
            n_jobs+=1
            counts.append(0)
        counts[-1] += 1
    return (sum(counts))/n_jobs

def calculate_duration_features_workers(durations):
    all_durations = []
    for duration in durations:
        for d in duration:
            all_durations.extend(d)
    all_durations = [d for d in all_durations if d != 0]
    min_duration = min(all_durations)
    max_duration = max(all_durations)
    span = max_duration - min_duration
    stdev = statistics.stdev(all_durations)
    return min_duration, max_duration, statistics.mean(all_durations), span, stdev

def calculate_duration_features(durations):
    all_durations = []
    for duration in durations:
        all_durations.extend(duration)
    all_durations = [d for d in all_durations if d != 0]
    min_duration = min(all_durations)
    max_duration = max(all_durations)
    span = max_duration - min_duration
    #stdev = statistics.stdev(all_durations)
    mean = sum(all_durations)/len(all_durations)
    var = sum(pow(x-mean, 2) for x in all_durations) / len(all_durations)
    stdev = math.sqrt(var)
    return min_duration, max_duration, statistics.mean(all_durations), span, stdev

In [21]:
instances = os.listdir(path)
instance_data = []
feature_vectors = []
consider_duration_features = True
for instance in instances:
    encoding = parser.parse_benchmark(path + "\\" + instance)
    feature_vector = []
    min_d, max_d, d_mean, d_span, d_stdev = calculate_duration_features(encoding.durations()) if not includeWorkers else calculate_duration_features_workers(encoding.durations())
    #TODO: worker data, or just do it with base FJSSP since all benchmarks are transformed in the same way
    extracted_data = {
        "name": instance[:-4],
        "n_jobs": encoding.n_jobs(), 
        "n_machines": encoding.n_machines(),
        "n_operations": encoding.n_operations(),
        "durations": encoding.durations(),
        "job_sequence": encoding.job_sequence(),
        "machines_for_all_operations": encoding.get_all_machines_for_all_operations() if includeWorkers else encoding.get_machines_for_all_operations(),
        "flexibility": calculate_flexibility(encoding.get_all_machines_for_all_operations() if includeWorkers else encoding.get_machines_for_all_operations(), encoding.n_machines()),
        "duration_variety": calculate_duration_variety(encoding.durations()),
        "average_operations": calculate_average_operations(encoding.job_sequence()),
        "min_duration": min_d,
        "max_duration": max_d,
        "duration_span": d_span,
        "duration_std": d_stdev,
        "duration_mean": d_mean
    }
    feature_vector = [encoding.n_jobs(), encoding.n_machines(), encoding.n_operations(), extracted_data["flexibility"], extracted_data["duration_variety"], extracted_data["average_operations"]]
    if consider_duration_features:
        feature_vector.extend([min_d, max_d, d_span, d_stdev])
    feature_vectors.append(feature_vector)
    instance_data.append(extracted_data)

In [44]:
optimal_benchmarks = ['BrandimarteMk1', 'HurinkSdata10', 'HurinkSdata11', 'HurinkSdata12', 'HurinkSdata13', 'HurinkSdata14', 'HurinkSdata15', 'HurinkSdata16', 'HurinkSdata17', 'HurinkSdata18', 'HurinkSdata1', 'HurinkSdata34', 'HurinkSdata49', 'HurinkSdata4', 'HurinkSdata52', 'HurinkSdata8', 'HurinkSdata9', 'HurinkEdata1', 'HurinkEdata52', 'HurinkEdata53', 'HurinkEdata5', 'HurinkEdata7', 'HurinkEdata8', 'HurinkRdata19', 'HurinkRdata1', 'HurinkRdata45', 'HurinkRdata55', 'HurinkVdata19', 'HurinkVdata1', 'HurinkVdata20', 'HurinkVdata21', 'HurinkVdata22', 'HurinkVdata23', 'HurinkVdata2', 'HurinkVdata39', 'HurinkVdata40', 'HurinkVdata41', 'HurinkVdata42', 'HurinkVdata43', 'HurinkVdata45', 'HurinkVdata54', 'HurinkVdata55', 'HurinkVdata57', 'HurinkVdata59', 'HurinkVdata60', 'HurinkVdata61', 'HurinkVdata62', 'HurinkVdata64', 'HurinkVdata65', 'HurinkVdata66', 'Kacem1', 'Kacem2', 'Kacem3', 'Fattahi10', 'Fattahi11', 'Fattahi12', 'Fattahi13', 'Fattahi14', 'Fattahi15', 'Fattahi16', 'Fattahi1', 'Fattahi2', 'Fattahi3', 'Fattahi4', 'Fattahi5', 'Fattahi6', 'Fattahi7', 'Fattahi8', 'Fattahi9']

In [45]:
'abc' in 'abdc'

False

In [46]:
count = 0
for instance in instance_data:
    if instance['name'] in optimal_benchmarks:
        count += 1
print(count)

69


In [60]:
optimal_data = {
    'flexibility': [],
    'duration_variety': [],
    'operations': [],
    'machines': [],
    'duration_mean': [],
    'duration_std': [],
    'duration_span': []
}
for instance in instance_data:
    if instance['name'] in optimal_benchmarks:
        optimal_data['flexibility'].append(instance['flexibility'])
        optimal_data['duration_variety'].append(instance['duration_variety'])
        optimal_data['operations'].append(instance['n_operations'])
        optimal_data['machines'].append(instance['n_machines'])
        optimal_data['duration_mean'].append(instance['duration_mean'])
        optimal_data['duration_std'].append(instance['duration_std'])
        optimal_data['duration_span'].append(instance['duration_span'])

In [61]:
print(max(optimal_data['duration_span']))
for metric in optimal_data:
    print(f'{metric}: Average: {statistics.mean(optimal_data[metric])} | Stdv: {statistics.stdev(optimal_data[metric])}')

987
flexibility: Average: 0.34336156053885647 | Stdv: 0.1561453373748901
duration_variety: Average: 0.014287164668938068 | Stdv: 0.007919366273508763
operations: Average: 94.21153846153847 | Stdv: 58.53048025734416
machines: Average: 8 | Stdv: 3.199264621385211
duration_mean: Average: 105 | Stdv: 148.9798644112687
duration_std: Average: 59.39160371612854 | Stdv: 90.15849176626433
duration_span: Average: 206 | Stdv: 309.4899029047636


In [49]:
optimal_data = {
    'flexibility': [],
    'duration_variety': [],
    'operations': [],
    'machines': [],
    'duration_mean': [],
    'duration_std': [],
    'duration_span': []
}
for instance in instance_data:
    if instance['name'] in optimal_benchmarks and instance['duration_span'] > 100:
        optimal_data['flexibility'].append(instance['flexibility'])
        optimal_data['duration_variety'].append(instance['duration_variety'])
        optimal_data['operations'].append(instance['n_operations'])
        optimal_data['machines'].append(instance['n_machines'])
        optimal_data['duration_mean'].append(instance['duration_mean'])
        optimal_data['duration_std'].append(instance['duration_std'])
        optimal_data['duration_span'].append(instance['duration_span'])

In [50]:
for metric in optimal_data:
    print(f'{metric}: Average: {statistics.mean(optimal_data[metric])} | Stdv: {statistics.stdev(optimal_data[metric])}')

flexibility: Average: 0.3658487601389762 | Stdv: 0.10001178418481162
duration_variety: Average: 0.05104284137897583 | Stdv: 0.03553935262204089
operations: Average: 32.35294117647059 | Stdv: 21.948636564917273
machines: Average: 5.882352941176471 | Stdv: 1.5764815627361644
duration_mean: Average: 268 | Stdv: 180.2553743997665
duration_std: Average: 145.4242010799654 | Stdv: 120.19598159948342
duration_span: Average: 525 | Stdv: 393.3103100606441


In [58]:
optimal_data = {
    'flexibility': [],
    'duration_variety': [],
    'operations': [],
    'machines': [],
    'duration_mean': [],
    'duration_std': [],
    'duration_span': []
}
for instance in instance_data:
    if instance['duration_span'] > 100:
        optimal_data['flexibility'].append(instance['flexibility'])
        optimal_data['duration_variety'].append(instance['duration_variety'])
        optimal_data['operations'].append(instance['n_operations'])
        optimal_data['machines'].append(instance['n_machines'])
        optimal_data['duration_mean'].append(instance['duration_mean'])
        optimal_data['duration_std'].append(instance['duration_std'])
        optimal_data['duration_span'].append(instance['duration_span'])

In [59]:
for metric in optimal_data:
    print(f'{metric}: Average: {statistics.mean(optimal_data[metric])} | Stdv: {statistics.stdev(optimal_data[metric])}')

flexibility: Average: 0.3293479621105489 | Stdv: 0.1312458618360487
duration_variety: Average: 0.029322552034605605 | Stdv: 0.026659393211770525
operations: Average: 55.520833333333336 | Stdv: 43.122778491450376
machines: Average: 6.375 | Stdv: 2.159836479231999
duration_mean: Average: 356 | Stdv: 174.12064782787823
duration_std: Average: 200.60853617192106 | Stdv: 109.40443710017388
duration_span: Average: 745 | Stdv: 370.6804553790232


In [51]:
optimal_benchmarks = ['BrandimarteMk1', 'HurinkSdata10', 'HurinkSdata11', 'HurinkSdata12', 'HurinkSdata13', 'HurinkSdata14', 'HurinkSdata15', 'HurinkSdata16', 'HurinkSdata17', 'HurinkSdata18', 'HurinkSdata1', 'HurinkSdata34', 'HurinkSdata49', 'HurinkSdata4', 'HurinkSdata52', 'HurinkSdata8', 'HurinkSdata9', 'HurinkEdata1', 'HurinkEdata52', 'HurinkEdata53', 'HurinkEdata5', 'HurinkEdata7', 'HurinkEdata8', 'HurinkRdata19', 'HurinkRdata1', 'HurinkRdata45', 'HurinkRdata55', 'HurinkVdata19', 'HurinkVdata1', 'HurinkVdata20', 'HurinkVdata21', 'HurinkVdata22', 'HurinkVdata23', 'HurinkVdata2', 'HurinkVdata39', 'HurinkVdata40', 'HurinkVdata41', 'HurinkVdata42', 'HurinkVdata43', 'HurinkVdata45', 'HurinkVdata54', 'HurinkVdata55', 'HurinkVdata57', 'HurinkVdata59', 'HurinkVdata60', 'HurinkVdata61', 'HurinkVdata62', 'HurinkVdata64', 'HurinkVdata65', 'HurinkVdata66', 'Kacem2', 'Fattahi16']

In [52]:
optimal_data = {
    'flexibility': [],
    'duration_variety': [],
    'operations': [],
    'machines': [],
    'duration_mean': [],
    'duration_std': [],
    'duration_span': []
}
for instance in instance_data:
    if instance['name'] in optimal_benchmarks:
        optimal_data['flexibility'].append(instance['flexibility'])
        optimal_data['duration_variety'].append(instance['duration_variety'])
        optimal_data['operations'].append(instance['n_operations'])
        optimal_data['machines'].append(instance['n_machines'])
        optimal_data['duration_mean'].append(instance['duration_mean'])
        optimal_data['duration_std'].append(instance['duration_std'])
        optimal_data['duration_span'].append(instance['duration_span'])

In [53]:
for metric in optimal_data:
    print(f'{metric}: Average: {statistics.mean(optimal_data[metric])} | Stdv: {statistics.stdev(optimal_data[metric])}')

flexibility: Average: 0.34336156053885647 | Stdv: 0.1561453373748901
duration_variety: Average: 0.014287164668938068 | Stdv: 0.007919366273508763
operations: Average: 94.21153846153847 | Stdv: 58.53048025734416
machines: Average: 8 | Stdv: 3.199264621385211
duration_mean: Average: 105 | Stdv: 148.9798644112687
duration_std: Average: 59.39160371612854 | Stdv: 90.15849176626433
duration_span: Average: 206 | Stdv: 309.4899029047636


In [None]:
optimal_data = {
    'flexibility': [],
    'duration_variety': [],
    'operations': [],
    'machines': [],
    'duration_mean': [],
    'duration_std': [],
    'duration_span': []
}
i = 25
c = 0
d = dict()
while c < (1000/i):
    for instance in instance_data:
        if instance['duration_span'] >= i*c and instance['duration_span'] <= (c+1)*i:
            optimal_data['flexibility'].append(instance['flexibility'])
            optimal_data['duration_variety'].append(instance['duration_variety'])
            optimal_data['operations'].append(instance['n_operations'])
            optimal_data['machines'].append(instance['n_machines'])
            optimal_data['duration_mean'].append(instance['duration_mean'])
            optimal_data['duration_std'].append(instance['duration_std'])
            optimal_data['duration_span'].append(instance['duration_span'])
    c+=1
    plot_data = {
        'flexibility': [[],[]],
        'duration_variety': [[],[]],
        'operations': [[],[]],
        'machines': [[],[]],
        'duration_mean': [[],[]],
        'duration_std': [[],[]],
        'duration_span': [[],[]],
        'count': len(optimal_data['flexibility'])
    }
    for metric in optimal_data:
        plot_data[metric][0] = statistics.mean(optimal_data[metric])
        plot_data[metric][1] = statistics.stdev(optimal_data[metric])
    d[c].append(plot_data)


