In [1]:
import pandas as pd
import numpy as np
import math
from functools import cmp_to_key

In [3]:
#mount your google drive.
#it will be visible in the file navigator on the left of this notebook
#there should be a folder in your drive with your data
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/My Drive/Colab Notebooks

Mounted at /content/drive
/content/drive/My Drive/Colab Notebooks


In [4]:
df = pd.read_csv('google-cluster-data-1.csv',sep=' ')
df

Unnamed: 0,Time,ParentID,TaskID,JobType,NrmlTaskCores,NrmlTaskMem,Unnamed: 6
0,90000,757745334,1488529826,0,0.000000,0.031130,
1,90000,975992247,1488529821,0,0.000000,0.000000,
2,90000,1468458091,1488529832,1,0.021875,0.002353,
3,90000,1460281235,1488529840,0,0.000000,0.000000,
4,90000,1164728954,1488529835,0,0.003125,0.001638,
...,...,...,...,...,...,...,...
3535024,112500,1487094655,1487103476,0,0.000000,0.000879,
3535025,112500,1461321601,1465612301,0,0.000000,0.000879,
3535026,112500,1487094655,1487097223,0,0.000000,0.000879,
3535027,112500,618817162,1485932004,1,0.000000,0.000879,


## Append Execution Time and Turnaround Time information to dataframe
* Add total execution time list to dataframe under key "ExecutionTime". A task queue is created for each time quantum for which tasks are allocated. Tasks are allocated to a given task queue based on their arrival time and remaining execution time. Therefore, total execution time is used to determine which task queues each task should be placed in.
* Add execution time remaining list to datafram under key "ExecutionTimeRemaining". Execution time remaining is used by the SJF algorithm to determine which tasks should be allocated to the VMs first. 
* Add columns to store the turnaround times for each version of the greedy algorithm (power, cost, and just the turnaround time).

In [5]:
def powerVMAlloc(vms, task_idx, vm_idx, all_past_threshold):
  can_fit = df['NrmlTaskCores'][task_idx] <= vms[0][vm_idx] and df['NrmlTaskMem'][task_idx] <= vms[1][vm_idx]
  if all_past_threshold:
    return can_fit
  else:
    dy_pwr_threshold = 0.5
    
    cpu = df['NrmlTaskCores'][task_idx]

    # current vm calculation
    cpu_cap = 5.0
    ccr = float(vms[0][vm_idx]) - float(cpu) # current core remaining 

    usage_rate_curr = 1.0 - float(ccr/cpu_cap)   

    return can_fit and usage_rate_curr < dy_pwr_threshold      


In [6]:
def powerConsumption(myVmSpace):
    c = 5
    a = 100
    b = 200
    threshold = 0.5
    
    Pwr_dy = 0
    Pwr_st = 0
    Pwr = 0
    
    for core in myVmSpace[0]:
        coreUsage = 1-core/5
        if(coreUsage > 0):
            Pwr_st = c
        if(coreUsage < threshold):
            Pwr_dy = a*coreUsage
        else:
            Pwr_dy = a*threshold + b*(coreUsage-threshold)*(coreUsage-threshold)
        Pwr = Pwr + Pwr_dy + Pwr_st
    return Pwr

In [37]:
def RR(taskQueue, myVmSpace, currTime):
               
    VM_iter = 0
    #will iterate through all possible starting VMs, increments by one each time
    VM_jter = 0
    #incrementer used when task does not fit in the first potentially available VM
    
    rejectQueue = []

    time_quantum = 300
               
    for task_idx in taskQueue:
        if(df['NrmlTaskCores'][task_idx] <= myVmSpace[0][VM_iter] and df['NrmlTaskMem'][task_idx] <= myVmSpace[1][VM_iter]):
            #if the number of cores and memory space the task requires is less than the available for the VMS
            #then we can allocate the task to the current VM
            myVmSpace[0][VM_iter] -= df['NrmlTaskCores'][task_idx]
            myVmSpace[1][VM_iter] -= df['NrmlTaskMem'][task_idx]
            #now that the task is allocated, we have to update the amount of free CPU and memory space
            finTime = currTime + time_quantum
            startTime = df["Time"][task_idx]
            df["TurnaroundTime_RR"][task_idx] = finTime - startTime
            # Save turnaround time
        else:
            allocated = False
            VM_jter = (VM_iter + 1) % 100
            #move to the next VM
            while(not allocated and VM_jter!=VM_iter):
                #if it is allocated then don't keep looking
                #if you've looked at all possible VMs and its not allocated, then it never will be so end loop
                if(df['NrmlTaskCores'][task_idx] <= myVmSpace[0][VM_jter] and df['NrmlTaskMem'][task_idx] <= myVmSpace[1][VM_jter]):
                    #if the number of cores and memory space the task requires is less than the available for the VMS
                    #then we can allocate the task to the current VM
                    myVmSpace[0][VM_jter] -= df['NrmlTaskCores'][task_idx]
                    myVmSpace[1][VM_jter] -= df['NrmlTaskMem'][task_idx]
                    #now that the task is allocated, we have to update the amount of free CPU and memory space
                    allocated = True
                    #allocate the task
                    # Save turnaround time
                    finTime = currTime + time_quantum
                    startTime = df["Time"][task_idx]
                    df["TurnaroundTime_RR"][task_idx] = finTime - startTime
                VM_jter = (VM_jter + 1) % 100
                #move to the next VM
            if(not allocated):
                rejectQueue.append(df['TaskID'][task_idx])
                #if the while loop finishes and the task is still not allocated
                #then it cycled through all VMs and did not fit any, so it cannot be allocated and should be added to reject queue
        VM_iter = (VM_iter + 1) % 100

    return rejectQueue, myVmSpace

# Greedy Algorithm 2 ways
* Optimize energy consumption
* Optimize cost

In [8]:
def greedy(task_queue, vms, optIdentifier, currTime):
    reject_queue = []
    time_quantum = 300

    all_past_threshold = False
    vm_iter = 0
    task_rejected = False
    check = False

    for task_idx in task_queue: 
        if task_rejected:
          reject_queue.append(task['TaskID'])
          continue

        if not all_past_threshold and optIdentifier == "power":
          should_alloc = powerVMAlloc(vms,task_idx,vm_iter,all_past_threshold)
          if not should_alloc and vm_iter == 99:
            vm_iter = 0
            all_past_threshold = True
          elif not should_alloc and vm_iter < 99:
            vm_iter += 1
            vms[0][vm_iter] -= df['NrmlTaskCores'][task_idx]
            vms[1][vm_iter] -= df['NrmlTaskMem'][task_idx]
            startTime = df['Time'][task_idx]
            exTime = df['ExecutionTime'][task_idx]
            if currTime == startTime + exTime - time_quantum:
              # Turnaround time is the curr time that the task finishes minus the arrival time
              finishTime = currTime + time_quantum
              df['TurnaroundTime_Power'][task_idx] = finishTime - startTime
          else:
            vms[0][vm_iter] -= df['NrmlTaskCores'][task_idx]
            vms[1][vm_iter] -= df['NrmlTaskMem'][task_idx]
            startTime = df['Time'][task_idx]
            exTime = df['ExecutionTime'][task_idx]
            if currTime == startTime + exTime - time_quantum:
              # Turnaround time is the curr time that the task finishes minus the arrival time
              finishTime = currTime + time_quantum
              df['TurnaroundTime_Power'][task_idx] = finishTime - startTime

        if all_past_threshold and optIdentifier == "power":
          should_alloc = powerVMAlloc(vms,task_idx,vm_iter,all_past_threshold)
          if not should_alloc:
            vm_jter = (vm_iter + 1) % 100
            should_alloc = False
            while vm_jter != vm_iter:
              should_alloc = powerVMAlloc(vms,task_idx,vm_jter,all_past_threshold)
              if should_alloc:
                vms[0][vm_jter] -= df['NrmlTaskCores'][task_idx]
                vms[1][vm_jter] -= df['NrmlTaskMem'][task_idx]
                vm_iter = (vm_iter + 1) % 100
                startTime = df['Time'][task_idx]
                exTime = df['ExecutionTime'][task_idx]
                if currTime == startTime + exTime - time_quantum:
                  # Turnaround time is the curr time that the task finishes minus the arrival time
                  finishTime = currTime + time_quantum
                  df['TurnaroundTime_Power'][task_idx] = finishTime - startTime
                break
              vm_jter = (vm_jter + 1) % 100
            if not should_alloc:
              rejectQueue.append(task['TaskID'])
              task_rejected = True
              continue
          else:
            vms[0][vm_iter] -= df['NrmlTaskCores'][task_idx]
            vms[1][vm_iter] -= df['NrmlTaskMem'][task_idx]
            vm_iter = (vm_iter + 1) % 100
            startTime = df['Time'][task_idx]
            exTime = df['ExecutionTime'][task_idx]
            if currTime == startTime + exTime - time_quantum:
              # Turnaround time is the curr time that the task finishes minus the arrival time
              finishTime = currTime + time_quantum
              df['TurnaroundTime_Power'][task_idx] = finishTime - startTime
        
        if not all_past_threshold and optIdentifier == "cost":
          # TODO: CHANGE FOR NEW COST CODE SETUP
          powerDelta = powerVMAlloc(vms,task_idx,vm_idx)
          prcIdx = int((currTime - 90000)/3600)
          optDelta = Price_t[prcIdx]*powerDelta
          if(optDelta < minOptDelta):
              minOptDelta = optDelta
              min_vm_idx = vm_idx
          # Make sure you first run the code that initializes the execution time / turnaround
          # dataframe before running the greedy algorithm! Do this every time
          startTime = df['Time'][task_idx]
          exTime = df['ExecutionTime'][task_idx]
          if(currTime == startTime + exTime - time_quantum):
              # Turnaround time is the curr time that the task finishes minus the arrival time
              finishTime = currTime + time_quantum
              df['TurnaroundTime_Cost'][task_idx] = finishTime - startTime
        
        if all_past_threshold and optIdentifier == "cost":
          # TODO: CHANGE FOR NEW COST CODE SETUP
          powerDelta = powerVMAlloc(vms,task_idx,vm_idx)
          prcIdx = int((currTime - 90000)/3600)
          optDelta = Price_t[prcIdx]*powerDelta
          if(optDelta < minOptDelta):
              minOptDelta = optDelta
              min_vm_idx = vm_idx
          # Make sure you first run the code that initializes the execution time / turnaround
          # dataframe before running the greedy algorithm! Do this every time
          startTime = df['Time'][task_idx]
          exTime = df['ExecutionTime'][task_idx]
          if(currTime == startTime + exTime - time_quantum):
              # Turnaround time is the curr time that the task finishes minus the arrival time
              finishTime = currTime + time_quantum
              df['TurnaroundTime_Cost'][task_idx] = finishTime - startTime

    return reject_queue, vms

# Shortest Job First Algorithm
The Shortest Job First algorithm is optimal for waiting time and thus turnaround time (assuming task execution time is time-invariant). The challenge in most cases is not knowing the task execution time before execution. However, in this example, we know each task's execution time in advance. 
# There are several steps involved in implimenting SJF. 
1. Create a dictionary containing all vms for all times for which the algorithm will be run. Creating all vms at once allows us to allocate a process to VMs for all of its execution time, not just the current time quantum. 
2. Sort tasks arriving at the current time in a list in ascending order. This way, if we iterate through the sorted tasks, priority will be given to shorter tasks. 
3. Iterate through the sorted tasks implimenting a round robin-like algorithm. If a task can fit in the first VM, then allocate it. Otherwise, go through the next VMs until you find one it can fit in. If there are no VMs that the task can fit in, then append it to the reject queue. Always start the next task at the VM after the last one that was filled. 
4. return the reject queue and vm dictionary. 


In [52]:
def sjf(time_params,time_quantum,vm_size):
    vms = {} #vms for all possible times
    rejectQueue = []
    # Reinitialize Turnaround times
    df["TurnaroundTime"] = [0]*len(df)
    # Worst case:
    # If setup == 1, vms will be used at the max arrival time + time quantum * 2 (b/c burst time max = 600 and time quantum = 300)
    # If setup == 2, max time for vms will be max arrival time + time quantum * 10 (b/c burst time max = 3000 and time quantum = 300)
    for time in range(time_params[0],time_params[1] + time_quantum * 2,time_quantum):
        vms[time] = [100*[vm_size[0]],100*[vm_size[1]]]
    for time in range(time_params[0],time_params[1],time_quantum):
        # print(time)
        df_t = df[df['Time'].isin([time])] #get the elements from a certain time
        # print(df_t)
        # print("Length of df for time " + str(time) + " is " + str(len(df_t)))
        vm_iter = 0
        task_rejected = 0

        for index,task in df_t.iterrows():
            if(task_rejected == 0):
                if(task["ExecutionTime"] == 300):
                    # print("Entered inner for loop, index = " + str(index) + " counter = " + str(ctr) + " time = " + str(time))
                    if(task['NrmlTaskCores'] <= vms[time][0][vm_iter]) and (task['NrmlTaskMem'] <= vms[time][1][vm_iter]):
                        vms[time][0][vm_iter] = vms[time][0][vm_iter] - task['NrmlTaskCores']
                        vms[time][1][vm_iter] = vms[time][1][vm_iter] - task['NrmlTaskMem']
                        finTime = time + 300
                        df["TurnaroundTime"][index] = finTime - task["Time"]
                    else: 
                        vm_iter = vm_iter+1
                        if(vm_iter < len(vms[time][0])):
                            vms[time][0][vm_iter] = vms[time][0][vm_iter] - task['NrmlTaskCores']
                            vms[time][1][vm_iter] = vms[time][1][vm_iter] - task['NrmlTaskMem']
                            finTime = time + 300
                            df["TurnaroundTime"][index] = finTime - task["Time"]
                        else:
                            task_rejected = 1
                            rejectQueue.append(task['TaskID'])
            else:
                rejectQueue.append(task['TaskID'])
        for index,task in df_t.iterrows():
            if(task_rejected == 0):
                if(task["ExecutionTime"] == 600):
                    # print("Entered inner for loop, index = " + str(index) + " counter = " + str(ctr) + " time = " + str(time))
                    if(task['NrmlTaskCores'] <= vms[time][0][vm_iter]) and (task['NrmlTaskMem'] <= vms[time][1][vm_iter]):
                        vms[time][0][vm_iter] = vms[time][0][vm_iter] - task['NrmlTaskCores']
                        vms[time][1][vm_iter] = vms[time][1][vm_iter] - task['NrmlTaskMem']
                        vms[time+300][0][vm_iter] = vms[time+300][0][vm_iter] - task['NrmlTaskCores']
                        vms[time+300][1][vm_iter] = vms[time+300][1][vm_iter] - task['NrmlTaskMem']
                        finTime = time + 600
                        df["TurnaroundTime"][index] = finTime - task["Time"]
                    else: 
                        vm_iter = vm_iter+1
                        if(vm_iter < len(vms[time][0])):
                            vms[time][0][vm_iter] = vms[time][0][vm_iter] - task['NrmlTaskCores']
                            vms[time][1][vm_iter] = vms[time][1][vm_iter] - task['NrmlTaskMem']
                            vms[time+300][0][vm_iter] = vms[time+300][0][vm_iter] - task['NrmlTaskCores']
                            vms[time+300][1][vm_iter] = vms[time+300][1][vm_iter] - task['NrmlTaskMem']
                            finTime = time + 600
                            df["TurnaroundTime"][index] = finTime - task["Time"]
                        else:
                            task_rejected = 1
            else:
                rejectQueue.append(task['TaskID'])
        print("Finished running time {}".format(time))
    return vms,rejectQueue

In [21]:
def createTaskQueuesRR():
    
    taskQueues = {}
    for time in range(90000,113400,300): # Finish by 113,100 by the latest (113400 not included)
        taskQueues[time] = []
    time = 90000
    for taskIdx in range(len(df['Time'])):
        if(df['Time'][taskIdx] == time):
            taskQueues[time].append(taskIdx)
            if(df['ExecutionTime'][taskIdx] > 300):
                taskQueues[time+300].append(taskIdx)
        else:
            time = time + 300
    return taskQueues

In [22]:
#created a new python dictionary with more intuitive formatting

taskQueuesRR = createTaskQueuesRR()
    

In [38]:
# Round Robin

timeQ = 300

rejectQueue = [] 
#queue of tasks that could not fit into any VM for all times
rejectQueue_t = []

Pwr_dy_t = {}
Pwr_st_t = {}
Pwr_t = {}
Cost_t = {}

for time in range(90000,113400,timeQ): # Go from 90000 to 113100 (113400 not included)
    Pwr_dy_t[time] = []
    Pwr_st_t[time] = []
    Pwr_t[time] = []
    Cost_t[time] = []

Price_t = [0.5, 0.5, 0.6, 0.6, 0.6, 0.7, 0.7, 0.6, 0.6, 0.8, 0.8, 0.8, 0.8]
totalCost = 0
totalPwr = 0

for time in range(90000,113400,timeQ): # Go from 90000 to 113100 (113400 not included)
    taskQueue_t = taskQueuesRR[time]
    vms = [100*[5],100*[10]]
    #reinitialize VMs for new time
    rejectQueue_t, vms = RR(taskQueue_t, vms, time)
    print("Finished running for time {}".format(time))
    if(rejectQueue_t != []):
        rejectQueue.append(rejectQueue_t)
    Pwr_t[time] = powerConsumption(vms)
    totalPwr += Pwr_t[time]
    prcIdx = int((time - 90000)/3600)
    Cost_t[time] = Price_t[prcIdx]*Pwr_t[time]
    totalCost += Cost_t[time]

print("Round Robin:")
print("\tTotal power:", totalPwr)
print("\tTotal cost: ", totalCost)

turnaroundtime_sum = 0
for index,task in df.iterrows():
    turnaroundtime_sum += task.TurnaroundTime_RR
avg_turnaroundtime = float(turnaroundtime_sum) / float(len(df))
print("\tAverage turnaround time:", avg_turnaroundtime)

print("\t" + str(len(rejectQueue)) + " rejections")
if(rejectQueue != []):
    np.save('taskReject_2_RR.npy',rejectQueue)

vmFileName = "VMs_2_RR.npy"
np.save(vmFileName, vms)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Finished running for time 90000
Finished running for time 90300


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Finished running for time 90600
Finished running for time 90900
Finished running for time 91200
Finished running for time 91500
Finished running for time 91800
Finished running for time 92100
Finished running for time 92400
Finished running for time 92700
Finished running for time 93000
Finished running for time 93300
Finished running for time 93600
Finished running for time 93900
Finished running for time 94200
Finished running for time 94500
Finished running for time 94800
Finished running for time 95100
Finished running for time 95400
Finished running for time 95700
Finished running for time 96000
Finished running for time 96300
Finished running for time 96600
Finished running for time 96900
Finished running for time 97200
Finished running for time 97500
Finished running for time 97800
Finished running for time 98100
Finished running for time 98400
Finished running for time 98700
Finished running for time 99000
Finished running for time 99300
Finished running for time 99600
Finished

  return array(a, dtype, copy=False, order=order, subok=True)


In [31]:
def compare(leftIdx, rightIdx):
  if df['NrmlTaskCores'][leftIdx] < df['NrmlTaskCores'][rightIdx]:
    return -1
  elif df['NrmlTaskCores'][leftIdx] == df['NrmlTaskCores'][rightIdx]:
    return 0
  else:
    return 1


def generateTaskQueues():
    taskQueues = {}
    for time in range(90000,113400,300):
        taskQueues[time] = []
    time = 90000
    for taskIdx in range(len(df['Time'])):
        if(df['Time'][taskIdx] == time):
            exTime = df['ExecutionTime'][taskIdx]
            timeCnt = 0
            while(exTime > 0):
                taskQueues[time+timeCnt].append(taskIdx)
                timeCnt = timeCnt + 300
                exTime = exTime - 300
        else:
            # Sort task queue for current time before going to next time
            taskQueues[time] = sorted(taskQueues[time], key=cmp_to_key(compare), reverse=False)
            time = time + 300
        
    return taskQueues

In [47]:
exec_time_arr = []
subtasks = 0
quantum = 300
rejectQueue = []
burstTime = None
for index,task in df.iterrows():
    taskID = task["TaskID"]
    burstTime = ((taskID%2)+1)*quantum      # only two options for burst time
    exec_time_arr.append(burstTime)
    subtasks += burstTime / quantum

time_dict = {
    'ExecutionTime': exec_time_arr,
    'TurnaroundTime_Power': [0]*len(df["Time"]),
    'TurnaroundTime_Cost': [0]*len(df["Time"]),
    'TurnaroundTime': [0]*len(df["Time"]),
    'TurnaroundTime_RR': [0]*len(df["Time"])
}
for key in time_dict.keys():
    df[key] = time_dict[key]
print("Subtasks:", subtasks)

# Create power and cost dictionaries
Pwr_t = {}
Cost_t = {}

Price_t = [0.5, 0.5, 0.6, 0.6, 0.6, 0.7, 0.7, 0.6, 0.6, 0.8, 0.8, 0.8, 0.8]

for time in range(90000,113400,300): 
    Pwr_t[time] = []
    Cost_t[time] = []

taskQueues = generateTaskQueues()

rejectQueue = []

KeyboardInterrupt: ignored

In [33]:

#power optimization
rejectQueue = []
totalPwr = 0
totalCost = 0
for time in range(90000,113400,300): #90000 - 113100 (113400 not included)
    taskQueue_t = taskQueues[time]
    vms = [100*[5.0],100*[10.0]]

    #reinitialize VMs for new time
    rejectQueue_t, vms_t = greedy(taskQueue_t,vms,"power", time)
    print("Finished running for time {}".format(time))
    if(rejectQueue_t != []):
        rejectQueue.extend(rejectQueue_t)
    Pwr_t[time] = powerConsumption(vms_t)
    totalPwr = totalPwr + Pwr_t[time]
    prcIdx = int((time - 90000)/3600)
    Cost_t[time] = Price_t[prcIdx]*Pwr_t[time]
    totalCost = totalCost + Cost_t[time]

print("Power optimization:")
print("\tTotal power:", totalPwr)
print("\tTotal cost: ", totalCost)

turnaroundtime_sum = 0
for index,task in df.iterrows():
    turnaroundtime_sum += task.TurnaroundTime_Power
avg_turnaroundtime = float(turnaroundtime_sum) / float(len(df))
print("\tAverage turnaround time:", avg_turnaroundtime)

print("\t" + str(len(rejectQueue)) + " rejections")
if (len(rejectQueue) > 0):
    rejectFileName = "taskReject_2_power.npy"
    np.save(rejectFileName, rejectQueue)
vmFileName = "VMs_2_power.npy"
np.save(vmFileName, vms)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Finished running for time 90000


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Finished running for time 90300
Finished running for time 90600
Finished running for time 90900
Finished running for time 91200
Finished running for time 91500
Finished running for time 91800
Finished running for time 92100
Finished running for time 92400
Finished running for time 92700
Finished running for time 93000
Finished running for time 93300
Finished running for time 93600
Finished running for time 93900
Finished running for time 94200
Finished running for time 94500
Finished running for time 94800
Finished running for time 95100
Finished running for time 95400
Finished running for time 95700
Finished running for time 96000
Finished running for time 96300
Finished running for time 96600
Finished running for time 96900
Finished running for time 97200
Finished running for time 97500
Finished running for time 97800
Finished running for time 98100
Finished running for time 98400
Finished running for time 98700
Finished running for time 99000
Finished running for time 99300
Finished

In [None]:
#cost optimization
totalPwr = 0
totalCost = 0
rejectQueue = []
for time in range(90000,113400,300): #90000 - 113100 (113400 not included)
    taskQueue_t = taskQueues[time]
    vms = [100*[5],100*[10]]
    #reinitialize VMs for new time
    rejectQueue_t, vms_t = greedy(taskQueue_t,vms,"cost",time)
    if(rejectQueue_t != []):
        rejectQueue.extend(rejectQueue_t)
    Pwr_t[time] = powerConsumption(vms_t)
    totalPwr = totalPwr + Pwr_t[time]
    prcIdx = int((time - 90000)/3600)
    Cost_t[time] = Price_t[prcIdx]*Pwr_t[time]
    totalCost = totalCost + Cost_t[time]

print("Cost optimization:")
print("\tTotal power:", totalPwr)
print("\tTotal cost: ", totalCost)

turnaroundtime_sum = 0
for index,task in df.iterrows():
    turnaroundtime_sum += task.TurnaroundTime_Cost
avg_turnaroundtime = float(turnaroundtime_sum) / float(len(df))
print("\tAverage turnaround time:", avg_turnaroundtime)

print("\t" + str(len(rejectQueue)) + " rejections")
if (len(rejectQueue) > 0):
    rejectFileName = "taskReject_2_cost.npy"
    np.save(rejectFileName, rejectQueue)
vmFileName = "VMs_2_cost.npy"
np.save(vmFileName, vms)

In [53]:
# Turnaround time optimization
totalPwr = 0
totalCost = 0
rejectQueue = []
vms,rejectQueue = sjf([90000,113400],300,[5,10]) #90000 - 113100 (113400 not included)
for time in range(90000,113400,300): #90000 - 113100 (113400 not included)
    vms_t = vms[time]
    Pwr_t[time] = powerConsumption(vms_t)
    totalPwr = totalPwr + Pwr_t[time]
    prcIdx = int((time - 90000)/3600)
    Cost_t[time] = Price_t[prcIdx]*Pwr_t[time]
    totalCost = totalCost + Cost_t[time]

print("Turnaround Time optimization:")
print("\tTotal power:", totalPwr)
print("\tTotal cost: ", totalCost)

turnaroundtime_sum = 0
for index,task in df.iterrows():
    turnaroundtime_sum += task.TurnaroundTime
avg_turnaroundtime = float(turnaroundtime_sum) / float(len(df))
print("\tAverage turnaround time:", avg_turnaroundtime)

print("\t" + str(len(rejectQueue)) + " rejections")
if (len(rejectQueue) > 0):
    rejectFileName = "taskReject_2_turnaround.npy"
    np.save(rejectFileName, rejectQueue)
vmFileName = "VMs_2_turnaround.npy"
np.save(vmFileName, vms)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Finished running time 90000


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Finished running time 90300
Finished running time 90600
Finished running time 90900
Finished running time 91200
Finished running time 91500
Finished running time 91800
Finished running time 92100
Finished running time 92400
Finished running time 92700
Finished running time 93000
Finished running time 93300
Finished running time 93600
Finished running time 93900
Finished running time 94200
Finished running time 94500
Finished running time 94800
Finished running time 95100
Finished running time 95400
Finished running time 95700
Finished running time 96000
Finished running time 96300
Finished running time 96600
Finished running time 96900
Finished running time 97200
Finished running time 97500
Finished running time 97800
Finished running time 98100
Finished running time 98400
Finished running time 98700
Finished running time 99000
Finished running time 99300
Finished running time 99600
Finished running time 99900
Finished running time 100200
Finished running time 100500
Finished running t