In [1]:
import pandas as pd
import numpy as np

In [2]:
tasks_dataframe = pd.read_csv(r"C:\Users\Marina\OneDrive\Desktop\Cell_a.csv")

In [3]:
# Removing rows with CPU usage equal to 0 from the dataset
cpu_usage = tasks_dataframe['CPU'] > 0
tasks_dataframe = tasks_dataframe.drop(tasks_dataframe[~cpu_usage].index)
tasks_dataframe

Unnamed: 0,Job_ID,Task_ID,Arrival_Time,CPU,Memory
0,375000667413,0,603026300,0.041851,0.001169
1,375000669289,0,606413041,0.024968,0.001179
2,375000670586,0,608994453,0.024176,0.001173
3,375000670588,0,608994466,0.019552,0.001163
4,375000670590,0,609042903,0.028044,0.001179
...,...,...,...,...,...
2329127,400465207745,0,2678935469565,0.004677,0.000067
2329128,400465219425,0,2678943690687,0.000343,0.000004
2329129,400465219425,1,2678943690687,0.000557,0.000004
2329130,400465256347,0,2678955330224,0.002459,0.000050


In [4]:
# Converting the Arrival Time column from microseconds to seconds
tasks_dataframe['Arrival_Time'] = tasks_dataframe['Arrival_Time'] / 1000000

In [50]:
import csv
import itertools

class Task:
    def __init__(self, job_id, task_id, arrival_time, cpu_usage, memory_usage):
        self.job_id = job_id
        self.task_id = task_id
        self.arrival_time = arrival_time
        self.cpu_usage = cpu_usage
        self.memory_usage = memory_usage
        self.completion_time = None
        self.service_time = cpu_usage / 0.1

    def __lt__(self, other):
        # Compare tasks based on remaining service time
        return self.cpu_usage < other.cpu_usage

# Function to simulate baseline (LWL dispatching and FCFS scheduling)
def calculate_completion_time_baseline(tasks, num_servers):
    servers = [[] for _ in range(N)]  # List of servers, each server can have multiple tasks
    unfinished_work = [0] *  N # Unfinished work for each server
    inter_arrival_times = [0] * len(tasks)
    task_completion_times = {}  # Dictionary to store completion time for each task
    service_times = []
    
    for n in range(len(tasks)):
        
        available_servers = [i for i, server_tasks in enumerate(servers) if not server_tasks]  # Find servers with no tasks
        if available_servers:
            server_id = available_servers[0]  # Choose the first available server
        else:
            server_id = unfinished_work.index(min(unfinished_work))  # Find the server with the least unfinished work
        
        # Assign the task to the server
        servers[server_id].append(tasks[n])

        # Calculate completion time for the task
        inter_arrival_time = tasks[n].arrival_time - inter_arrival_times[n - 1]
        inter_arrival_times[n] = tasks[n].arrival_time
        unfinished_work[server_id] = max(0, unfinished_work[server_id] - inter_arrival_time) + tasks[n].service_time
        service_times.append(tasks[n].service_time)
        task_completion_times[(tasks[n].job_id, tasks[n].task_id, tasks[n].arrival_time, tasks[n].cpu_usage)] = tasks[n].arrival_time + unfinished_work[server_id]
    
    
        
    return task_completion_times, service_times


import heapq

# Function to simulate LWS dispatching and JSN scheduling
def calculate_completion_time_SJN(tasks, num_servers):
    servers = [[] for _ in range(num_servers)]
    unfinished_work = [0] * num_servers
    inter_arrival_times = [0] * len(tasks)
    task_completion_times = {}
    service_times = []

    # Use a priority queue to order tasks by shortest job next (SJN)
    task_queue = []
    for n in range(len(tasks)):
        heapq.heappush(task_queue, (tasks[n].arrival_time, tasks[n].service_time, tasks[n]))

    while task_queue:
        # Get the next task with the shortest remaining service time
        _, _, task = heapq.heappop(task_queue)

        # Find the server with the least unfinished work
        server_id = unfinished_work.index(min(unfinished_work))

        # Assign the task to the server
        servers[server_id].append(tasks[n])

        # Calculate completion time for the task
        inter_arrival_time = tasks[n].arrival_time - inter_arrival_times[n - 1]
        inter_arrival_times[n] = tasks[n].arrival_time
        unfinished_work[server_id] = max(0, unfinished_work[server_id] - inter_arrival_time) + tasks[n].service_time
        service_times.append(tasks[n].service_time)
        task_completion_times[(tasks[n].job_id, tasks[n].task_id, tasks[n].arrival_time, tasks[n].cpu_usage)] = unfinished_work[server_id]

    return task_completion_times, service_times


In [51]:
# Read the dataset and create Task objects
tasks = []
for index, row in tasks_dataframe.iterrows():
    job_id = int(row[0])
    task_id = int(row[1])
    arrival_time = float(row[2])
    cpu_usage = float(row[3])
    memory_usage = float(row[4])
    
    task = Task(job_id, task_id, arrival_time, cpu_usage, memory_usage)
    tasks.append(task)

        
# Calculate completion time for each task
N = 64

# Function to simulate baseline (LWL dispatching and FCFS scheduling)
completion_times_baseline, service_times_baseline = calculate_completion_time_baseline(tasks, N)

# Function to simulate LWL dispatching and SJN scheduling
completion_times_SJN, service_times_SJN = calculate_completion_time_SJN(tasks, N)


In [52]:
job_response_times_baseline = {}
for (job_id, task_id, arrival_time, cpu_usage), completion_time in completion_times_baseline.items():
    if job_id not in job_response_times_baseline:
        job_response_times_baseline[job_id] = completion_time
    else:
        job_response_times_baseline[job_id] += completion_time
        
job_response_time_baseline = sum(job_response_times_baseline.values())
job_mean_response_time_baseline = job_response_time_baseline/len(job_response_times_baseline.values())
print(job_mean_response_time_baseline)


job_response_times_SJN = {}
for (job_id, task_id, arrival_time, cpu_usage), completion_time in completion_times_SJN.items():
    if job_id not in job_response_times:
        job_response_times_SJN[job_id] = completion_time
    else:
        job_response_times_SJN[job_id] += completion_time
        
job_response_time_SJN = sum(job_response_times_SJN.values())
job_mean_response_time_SJN = job_response_time_SJN/len(job_response_times_SJN.values())
print(job_mean_response_time_SJN)

3772276.438344625
0.043487548828125


In [53]:
job_slowdown_S_baseline = job_response_time_baseline/sum(service_times_baseline)
print(job_slowdown_S_baseline)

job_slowdown_S_SJN = job_response_time_SJN/sum(service_times_SJN)
print(job_slowdown_S_SJN)

47143.89302399442
4.3804470859513806e-07


In [55]:
# DA RIVEDERE, probabilmente non è corretta
total_simulation_time = 31
utilizations_baseline = [service_time / total_simulation_time for service_time in service_times_baseline]
mean_utilization_baseline = sum(utilizations_baseline) / 64
#display(pd.DataFrame(utilizations_baseline))
print(mean_utilization_baseline)

print(" ")

total_simulation_time = 31
utilizations_SJN = [service_time / total_simulation_time for service_time in service_times_SJN]
mean_utilization_SJN = sum(utilizations_SJN) / 64
#print(utilizations_SJN)
print(mean_utilization_SJN)

46519.07090246972
 
50.038562283242335


In [57]:
######ESEMPIO######
tasks_2 = [Task(0, 0, 0, 1.5, 0.3),
         Task(0, 1, 2, 2.2, 0.5),
         Task(1, 1, 3, 0.8, 0.7),
         Task(2, 0, 4, 3.0, 0.9)]
            
    
µ = 0.1
N = 2
completion_times, service_times = calculate_completion_time_baseline(tasks_2, N)

# Print completion time for each task
for (job_id, task_id, arrival_time, cpu_usage), completion_time in completion_times.items():
    print(f"Job ID: {job_id}, Task ID: {task_id}, Arrival Time : {arrival_time}, CPU : {cpu_usage},  Completion Time: {completion_time}")

Job ID: 0, Task ID: 0, Arrival Time : 0, CPU : 1.5,  Completion Time: 15.0
Job ID: 0, Task ID: 1, Arrival Time : 2, CPU : 2.2,  Completion Time: 24.0
Job ID: 1, Task ID: 1, Arrival Time : 3, CPU : 0.8,  Completion Time: 25.0
Job ID: 2, Task ID: 0, Arrival Time : 4, CPU : 3.0,  Completion Time: 55.0


In [58]:
job_response_times = {}
for (job_id, task_id, arrival_time, cpu_usage), completion_time in completion_times.items():
    if job_id not in job_response_times:
        job_response_times[job_id] = completion_time
    else:
        job_response_times[job_id] += completion_time
        
job_response_time = sum(job_response_times.values())
job_mean_response_time = job_response_time/len(job_response_times.values())
job_mean_response_time

39.666666666666664

In [59]:
job_slowdown_S = job_response_time/sum(service_times)
job_slowdown_S

1.5866666666666667