In [None]:
!pip install pulp

Collecting pulp
[?25l  Downloading https://files.pythonhosted.org/packages/14/c4/0eec14a0123209c261de6ff154ef3be5cad3fd557c084f468356662e0585/PuLP-2.4-py3-none-any.whl (40.6MB)
[K     |████████████████████████████████| 40.6MB 99kB/s 
[?25hCollecting amply>=0.1.2
  Downloading https://files.pythonhosted.org/packages/f3/c5/dfa09dd2595a2ab2ab4e6fa7bebef9565812722e1980d04b0edce5032066/amply-0.1.4-py3-none-any.whl
Installing collected packages: amply, pulp
Successfully installed amply-0.1.4 pulp-2.4


In [None]:
import pandas as pd
import pulp as pl
import numpy as np
from functools import cmp_to_key

In [None]:
data_st = ["{0}.csv".format(i) for i in range(1 , 8)]

In [None]:
fraction_of_data = 0.2  
def solve_data_set(FILENAME , fraction_of_data , lp_mini = 0.1 , lp_time_limit = 300 , common_deadline=10,MEAN=2,STDDEV=1):
  print ("processing file :  " + str(FILENAME) + " fraction : " + str(fraction_of_data))
  experimental_results = pd.DataFrame()

  ####################### READ DATA FROM THE FILES ################################
  df = pd.read_csv(FILENAME).sample(frac=fraction_of_data, replace=False, random_state=1)

  print("the number of rows in the data set is " + str(df.shape[0]))

  ## separating the users data frame and the server data frame into seperate data frames
  ## storing the coverage information of the servers in dictionary
  ## user data frame
  userdf = df[['uId' , 'UCPU' , 'URAM' , 'UST' , 'UBAN']]
  userdf = userdf.drop_duplicates(keep = 'first')

  ## server data frame 
  serverdf = df[['siteId' , 'coverage' , 'SCPU' , 'SSTO' , 'SBAN' , 'SRAM']]
  serverdf = serverdf.drop_duplicates(keep = 'first')

  ## dictionary to store the coverage of the servers
  coverage = {}
  serversites = serverdf['siteId'].tolist()

  for st in serversites:
    new_df = df.loc[df['siteId'] == st]
    ulist = new_df.uId.tolist()
    coverage[st] = ulist

  ## getting all the necessary values for the linear programming formulation
  num_servers = serverdf.shape[0]
  num_users = userdf.shape[0]



  ## getting the list of all the server resources into np arrays 
  servercpu = np.array(serverdf['SCPU'].tolist())
  serverstorage = np.array(serverdf['SSTO'].tolist())
  serverram = np.array(serverdf['SRAM'].tolist())
  serverbandwidth = np.array(serverdf['SBAN'].tolist())
  serverid = np.array(serverdf['siteId'].tolist())


  ## getting the list of all the user resources into np arrays 
  usercpu = np.array(userdf['UCPU'].tolist())
  userstorage = np.array(userdf['UST'].tolist())
  userram = np.array(userdf['URAM'].tolist())
  userbandwidth = np.array(userdf['UBAN'].tolist())
  userid = np.array(userdf['uId'].tolist())

  ## randomly generating the execution time for the users 
  userexecutiontime = np.round(np.array(np.random.normal(MEAN , STDDEV , num_users)))
  userexecutiontime = [max(i , 0) for i in userexecutiontime]

  def is_in_coverage_of(j , i):
    ## whether the jth user is in coverage of the ith server or not
    site_id_of_server = serverid[i]
    user_id_of_user = userid[j]
    cover = coverage[site_id_of_server]
    if user_id_of_user in cover:
      return 1
    else:
      return 0
  


  ####################### LINEAR PROGRAMMING SOLUTION ##################################
  def linear_programming_solution(minimization_weight = 0.0):
      server_user_time_vars = [[["x_{0}_{1}_{2}".format(i , j , t) for t in range(common_deadline)] for j in range(num_users)] for i in range(num_servers)]
      server_time_vars = [["y_{0}_{1}".format(i , t) for t in range(common_deadline)] for i in range(num_servers)]

      ## setting up the variables for the linear programming
      ## X[i][j][t] is whether the jth user is allocated to the ith server at time t or not
      ## Y[i][t] is whether the ith server is active at t

      X = [[[pl.LpVariable(server_user_time_vars[i][j][t], 0, 1, cat='Integer') for t in range(common_deadline)] for j in range(num_users)] for i in range(num_servers)]
      Y = [[pl.LpVariable(server_time_vars[i][t] , 0 , 1 , cat='Integer') for t in range(common_deadline)]for i in range(num_servers)]

      ## create the optimization problem as the maximization problem
      linearprogram = pl.LpProblem("euasolver", pl.LpMaximize)

      ## setting up the resource constraint equations 
      for t in range(common_deadline):
        for i in range(num_servers):

          ## cpu constraint at time t 
          constraintcpu = X[i][0][t] * usercpu[0]
          for j in range(1 , num_users):
            constraintcpu += X[i][j][t] * usercpu[j]
          constraintcpu -= Y[i][t] * servercpu[i]

          linearprogram += constraintcpu <= 0

          ## storage constraint at time t
          constraintstorage = X[i][0][t] * userstorage[0]
          for j in range(1 , num_users):
            constraintstorage += X[i][j][t] * userstorage[j]
          constraintstorage -= Y[i][t] * serverstorage[i]

          linearprogram += constraintstorage <= 0

          ## ram constraints at time t
          constraintram = X[i][0][t] * userram[0]
          for j in range(1 , num_users):
            constraintram += X[i][j][t] * userram[j]
          constraintram -= Y[i][t] * serverram[i]

          linearprogram += constraintram <= 0

          ## bandwidth constraint at time t
          constraintbandwidth = X[i][0][t] * userbandwidth[0]
          for j in range(1 , num_users):
            constraintbandwidth += X[i][j][t] * userbandwidth[j]
          constraintbandwidth -= Y[i][t] * serverbandwidth[i]

          linearprogram += constraintbandwidth <= 0


      ## at any time t one user must go to one server only
      for t in range(common_deadline):
        for j in range(num_users):
          serverconstraint = X[0][j][t]
          for i in range(1 , num_servers):
            serverconstraint += X[i][j][t];

          linearprogram += serverconstraint <= 1

      
      ## proximity constraint at any time t
      for i in range(num_servers):
        for j in range(num_users):
          ## find if the user is in coverage of the current server
          is_user_in_server_coverage = df.loc[(df.siteId == serverid[i]) & (df.uId == userid[j])].shape[0]
          
          if is_user_in_server_coverage == 0:
            ## if the user is not in the coverage of the server then for all the times the value of X[i][j][t] must be zero
            for t in range(common_deadline):
              user_out_of_coverage_constraint = X[i][j][t]
              linearprogram += user_out_of_coverage_constraint <= 0 
              linearprogram += user_out_of_coverage_constraint >= 0     


      ## Making the system migration free
      ## z_i_j = 1 if the jth user was allocated to the ith server
      ## other wise 0

      migration_control = [["z_{0}_{1}".format(i , j) for j in range(num_users)] for i in range(num_servers)]

      Z = [[pl.LpVariable(migration_control[i][j] , 0 , 1 , cat='Integer') for j in range(num_users)] for i in range(num_servers)]

      ## for a particular user we must have only one server to which it belongs
      for j in range(num_users):
        migration_constraint = Z[0][j]
        for i in range(1 , num_servers):
          migration_constraint += Z[i][j]

        linearprogram += migration_constraint <= 1


      ## the user must be only allocated to one server for the total sum of its execution time only
      for i in range(num_servers):
        for j in range(num_users):
          constraint_execution_time = X[i][j][0]
          for t in range(1 , common_deadline):  
            constraint_execution_time += X[i][j][t]
          
          linearprogram += constraint_execution_time <= Z[i][j] * userexecutiontime[j]
          linearprogram += constraint_execution_time >= Z[i][j] * userexecutiontime[j]

      obj = 0
      maximization_weight = 1 - minimization_weight
      for i in range(num_servers):
        for t in range(common_deadline):
          obj -= minimization_weight * Y[i][t]
          for j in range(num_users):
            obj += maximization_weight * X[i][j][t]

      linearprogram += obj
      # model.solve(pulp.PULP_CBC_CMD(maxSeconds=1000, msg=1, fracGap=0)) 
      status = linearprogram.solve(pl.PULP_CBC_CMD(timeLimit=lp_time_limit))

      if status == pl.LpStatusOptimal:
        num_users_allocated = 0
        for i in range(num_servers):
          for j in range(num_users):
            for t in range(common_deadline):
              num_users_allocated += pl.value(X[i][j][t])
        
        avg_users_allocated = (1.00 * num_users_allocated) / (common_deadline) 
        total_servers_used = 0
        for i in range(num_servers):
          for t in range(common_deadline):
            total_servers_used += pl.value(Y[i][t])
        
        avg_servers_used = (1.00 * total_servers_used) / (common_deadline)
        return (avg_users_allocated , avg_servers_used)
      else:
        print("unable to find the optimal solution")
        return (-1 , -1)

  lpp_users_allocated = linear_programming_solution(minimization_weight=lp_mini)


  ## performs the allocation of the users to the servers based on the comparators passed in
  def perform_user_allocation(usr_key , server_key , verbose = False):
      ## sorting the users and servers on the basis of the key
      userslist = [i for i in range(num_users)]
      serverlist = [i for i in range(num_servers)]
      userslist.sort(key = usr_key)
      serverlist.sort(key = server_key)

      ## forming the allocation dictionary for the users 
      allocated = {}
      ## maintaining the used servers
      servers_used = {}
      ## maintaining the execution time of the users
      exectime = userexecutiontime.copy() 
      ## create the map of users to servers at time t
      user_server_map = [[[0 for t in range(common_deadline)] for j in range(num_users)] for i in range(num_servers)]

      for t in range(common_deadline):
        ## for each time instant upto the common deadline do the following 
        
        ## get the server resources
        scpu = servercpu.copy()
        sstorage = serverstorage.copy()
        sram = serverram.copy()
        sbandwidth = serverbandwidth.copy()
        
        ## utility function to perform the allocation of a user to the server
        def allocate(j , i):
          ## allocate the jth user to the ith server
            scpu[i] = scpu[i] - usercpu[j] 
            sstorage[i] = sstorage[i] - userstorage[j] 
            sram[i] = sram[i] - userram[j] 
            sbandwidth[i] = sbandwidth[i] - userbandwidth[j]
        
        def canbeallocated(j , i):
        # whether the jth user can be allocated to ith server or not
            if is_in_coverage_of(j , i) == 0:
              return 0
            if usercpu[j] > scpu[i]:
              return 0
            if userstorage[j] > sstorage[i]:
              return 0
            if userram[j] > sram[i]:
              return 0
            if userbandwidth[j] > sbandwidth[i]:
              return 0
            remaining_time = common_deadline - t
            if remaining_time < exectime[j] :
              return 0
            return 1


        ### maintain edges first
        for j in range(num_users):
          usr = userslist[j]
          
          # if the users execution time is over then ignore and continue
          if exectime[usr] <= 0:
            continue

          # if the user has already been allocated to a server then try allocate him to it again
          if usr in allocated:
            server_used = allocated[usr] ## get the server to which he was allocated
            allocate(usr , server_used) ## allocate him to the same server
            exectime[usr] -= 1 ## one unit of execution time is done 
            user_server_map[server_used][usr][t] = 1 ## usr has been allocated to the server so mark him in the map
            servers_used[server_used] = 1 ## this server is marked as used
          else:
            continue
          
        ## look for new ones
        for j in range(num_users):
          usr = userslist[j]
          
          # if the users execution time is over then ignore and continue
          if exectime[usr] <= 0:
            continue

          ## try to allocate the users that have not been allocated yet
          if usr not in allocated:
              ## try to allocate the user to the starting from left to right
            for i in range(num_servers): 
              srvr = serverlist[i] ## this is the server that we are trying to allocate the user to
              if canbeallocated(usr , srvr) == 1: ## we check if the user can be allocated to the server
                allocate(usr , srvr) ## if yes, we do the allocation
                allocated[usr] = srvr ## mark the user as allocated 
                exectime[usr] -= 1    ## reduce the execution time of the user
                user_server_map[srvr][usr][t] = 1 ## store the mapping in the map
                servers_used[srvr] = 1 ## mark the server as used
                break

      ## allocation is done 
      avgusers = 0 ## calculate the average number of users that have been allocated

      ## simply take sum from the map
      for i in range(num_servers):
        for j in range(num_users):
          for t in range(common_deadline):
            avgusers += user_server_map[i][j][t]

      avgusers = (1.00 * avgusers) / (common_deadline)

      avgserversused = (1.00 * len(servers_used))
      if verbose == True:
        print ("the average number of users allocated " + str(avgusers))
        print ("the average number of servers used " + str(avgserversused))
      return (avgusers , avgserversused)
  


  ############################ RANDOM ALLOCATION ######################################
  def random_allocation():
      def userscompare(i , j):
        if i < j:
          return -1
        elif i == j:
          return 0
        else:
          return 1

      def serverscompare(i , j):
        if i < j:
          return -1
        elif i == j:
          return 0
        else:
          return 1
          
      return perform_user_allocation(cmp_to_key(userscompare) , cmp_to_key(serverscompare))
  random_user_allocated = random_allocation()



  ############################ GREEDY ALLOCATION ##################################
  def greedy_allocation():
      def userscompare(i , j):
        ei = userexecutiontime[i]
        ej = userexecutiontime[j]
        if ei < ej:
          return -1
        elif ei > ej:
          return 1
        else:    
          return 0

      def serverscompare(i , j):
        if i < j:
          return -1
        elif i == j:
          return 0
        else:
          return 1

      return perform_user_allocation(cmp_to_key(userscompare) , cmp_to_key(serverscompare))
    
  greedy_user_allocated = greedy_allocation()



  ############################ MCF ALLOCATION #########################
  def most_capacity_first():
    def userscompare(i , j):
      ei = userexecutiontime[i]
      ej = userexecutiontime[j]
      if ei < ej:
        return -1
      elif ei > ej:
        return 1
      else:    
        return 0

    def serverscompare(i , j):
      scapacityi = servercpu[i] + serverram[i] + serverbandwidth[i] + serverstorage[i]
      scapacityj = servercpu[j] + serverram[j] + serverbandwidth[j] + serverstorage[j]

      if scapacityi < scapacityj:
        return -1
      elif scapacityi > scapacityj:
        return 1
      else:    
        return 0

    return perform_user_allocation(cmp_to_key(userscompare) , cmp_to_key(serverscompare))
  mcf_user_allocated = most_capacity_first()

## for user we do a1 * resources + (1 - a1) * execution time = a1 * outgoing_edges + (1 - a1) * execution_time
## for servers we do b1 * (weight_of_incoming edges) + (1 - b1) * resources available
  results = pd.DataFrame()
  A = []
  B = []
  C = []
  def parameter_based_sorting():
      hyperparameter_a = [0.1 , 0.3 , 0.5 , 0.7 , 0.9 , 1.00]
      hyperparameter_b = [0.1 , 0.3 , 0.5 , 0.7 , 0.9 , 1.00]

      ## computing the results in the results data frame
      users_allocated = 0
      used_servers = 0
      a_optimal = 0
      b_optimal = 0
      for a1 in hyperparameter_a:
        for b1 in hyperparameter_b:
          def get_user_cost(j):
            cost = (a1 / 4.00) * (usercpu[j] + userstorage[j] + userram[j] + userbandwidth[j]) + (1 - a1) * userexecutiontime[j]
            return cost

          def get_server_cost(i):
            server_coverage = len(coverage[serverid[i]])
            cost = (b1 / 4.00) * (servercpu[i] + serverstorage[i] + serverram[i] + serverbandwidth[i]) + (1 - b1) * server_coverage
            return cost
          def userscompare(i , j):
            ## measure the cost of the ith user
            ui = get_user_cost(i)
            ## measure the cost of the jth user
            uj = get_user_cost(j)
            if ui < uj:
              return -1
            elif ui > uj:
              return 1
            else:    
              return 0

          def serverscompare(i , j):
            ## measure the cost of the ith server
            si = get_server_cost(i)
            ## measure the cost of the jth server
            sj = get_server_cost(j)

            if si < sj:
              return -1
            elif si > sj:
              return 1
            else:
              return 0
          allocation = perform_user_allocation(cmp_to_key(userscompare) , cmp_to_key(serverscompare))
          A.append(a1)
          B.append(b1)
          C.append(allocation[0])
          if allocation[0] > users_allocated:
            users_allocated = allocation[0]
            used_servers = allocation[1]
            a_optimal = a1
            b_optimal = b1
      results['A'] = A
      results['B'] = B
      results['U'] = C
      return (users_allocated , used_servers , a_optimal , b_optimal)
  mflow_user_allocated = parameter_based_sorting()


  (lpp_user_ , lpp_server_) = lpp_users_allocated
  (random_user_ , random_server_) = random_user_allocated
  (greedy_user_ , greedy_server_) = greedy_user_allocated
  (mcf_user_ , mcf_server_) = mcf_user_allocated
  (mflow_user_ , mflow_server_, a_optimal_ , b_optimal_) = mflow_user_allocated
  res = pd.DataFrame([[num_users , num_servers , common_deadline , MEAN , STDDEV,  lpp_user_, lpp_server_ , random_user_ , random_server_ , greedy_user_ , greedy_server_ , mcf_user_ , mcf_server_ , mflow_user_ , mflow_server_ , a_optimal_ , b_optimal_]], \
                     columns=['num_users' , 'num_servers' , 'common_deadline' , 'mean' , 'stddev' , 'lpp_user','lpp_server','random_user' , 'random_server' , 'greedy_user' , 'greedy_server' , 'mcf_user' , 'mcf_server' , 'mflow_user' , 'mflow_server' , 'a_optimal_' , 'b_optimal_'])

  print("deadline " + str(common_deadline) + " , num-servers " + str(num_servers) + " num-users " + str(num_users))
  return res

### Generating data from the same distribution(timelimit = TL for LPP SOLVER)

In [None]:
### data with deadline 10 and time limit 7 minutes
TL = 400
p = [0.05, 0.1 , 0.15 , 0.2 , 0.25 , 0.30, 0.35 , 0.40 , 0.45 , 0.50]
for FILENAME in ['3.csv']:
  experimental_results = pd.DataFrame()
  print ("processing file " + str(FILENAME))
  for data_points in range(10):
      print("sampling " + str(p[data_points] * 100) + " % of the data")
      print ("preparing iteration " + str(data_points))
      res = solve_data_set(FILENAME , p[data_points] , lp_mini=0.1 , lp_time_limit = TL, common_deadline=10)
      experimental_results = pd.concat([experimental_results , res])
  fname = 'u_s_10_' + FILENAME
  print("writing results to = " + str(fname))
  experimental_results.to_csv(fname)

processing file 3.csv
sampling 5.0 % of the data
preparing iteration 0
processing file :  3.csv fraction : 0.05
the number of rows in the data set is 258
deadline 10 , num-servers 36 num-users 203
sampling 10.0 % of the data
preparing iteration 1
processing file :  3.csv fraction : 0.1
the number of rows in the data set is 516
deadline 10 , num-servers 38 num-users 310
sampling 15.0 % of the data
preparing iteration 2
processing file :  3.csv fraction : 0.15
the number of rows in the data set is 773
deadline 10 , num-servers 39 num-users 364
sampling 20.0 % of the data
preparing iteration 3
processing file :  3.csv fraction : 0.2
the number of rows in the data set is 1031
deadline 10 , num-servers 42 num-users 407
sampling 25.0 % of the data
preparing iteration 4
processing file :  3.csv fraction : 0.25
the number of rows in the data set is 1289
deadline 10 , num-servers 43 num-users 430
sampling 30.0 % of the data
preparing iteration 5
processing file :  3.csv fraction : 0.3
the numbe


common deadline = 10 and user maximization and server minimization for data set 3.csv and file output res_10_3.csv


In [None]:
### data with deadline 10 and time limit 7 minutes
TL = 400
for FILENAME in ['3.csv']:
  experimental_results = pd.DataFrame()
  print ("processing file " + str(FILENAME))
  for data_points in range(1 , 7):
      print("sampling 30 % of the data")
      print ("preparing iteration " + str(data_points))
      res = solve_data_set(FILENAME , 0.3 , lp_mini=0.1 , lp_time_limit = TL, common_deadline=10)
      experimental_results = pd.concat([experimental_results , res])
  fname = 'res_10_' + FILENAME
  print("writing results to = " + str(fname))
  experimental_results.to_csv(fname)

processing file 3.csv
sampling 30 % of the data
preparing iteration 1
processing file :  3.csv fraction : 0.3
the number of rows in the data set is 1546
deadline 10 , num-servers 43 num-users 453
sampling 30 % of the data
preparing iteration 2
processing file :  3.csv fraction : 0.3
the number of rows in the data set is 1546
deadline 10 , num-servers 43 num-users 453
sampling 30 % of the data
preparing iteration 3
processing file :  3.csv fraction : 0.3
the number of rows in the data set is 1546
deadline 10 , num-servers 43 num-users 453
sampling 30 % of the data
preparing iteration 4
processing file :  3.csv fraction : 0.3
the number of rows in the data set is 1546
deadline 10 , num-servers 43 num-users 453
sampling 30 % of the data
preparing iteration 5
processing file :  3.csv fraction : 0.3
the number of rows in the data set is 1546
deadline 10 , num-servers 43 num-users 453
sampling 30 % of the data
preparing iteration 6
processing file :  3.csv fraction : 0.3
the number of rows i


common deadline = 20 and user maximization and server minimization for data set 3.csv and file output res_20_3.csv


In [None]:
TL = 400
for FILENAME in ['3.csv']:
  experimental_results = pd.DataFrame()
  print ("processing file " + str(FILENAME))
  for data_points in range(1 , 7):
      print("sampling 30 % of the data")
      print ("preparing iteration " + str(data_points))
      res = solve_data_set(FILENAME , 0.3 , lp_mini=0.1 , lp_time_limit = TL, common_deadline=20)
      experimental_results = pd.concat([experimental_results , res])
  fname = 'res_20_' + FILENAME
  print("writing results to = " + str(fname))
  experimental_results.to_csv(fname)

processing file 3.csv
sampling 30 % of the data
preparing iteration 1
processing file :  3.csv fraction : 0.3
the number of rows in the data set is 1546


KeyboardInterrupt: ignored


common deadline = 15 and user maximization and server minimization for data set 4.csv and file output res_15_4.csv


In [None]:
TL = 400
for FILENAME in ['4.csv']:
  experimental_results = pd.DataFrame()
  print ("processing file " + str(FILENAME))
  for data_points in range(1 , 7):
      print("sampling 30 % of the data")
      print ("preparing iteration " + str(data_points))
      res = solve_data_set(FILENAME , 0.3 , lp_mini=0.1 , lp_time_limit = TL, common_deadline=15)
      experimental_results = pd.concat([experimental_results , res])
  fname = 'res_15_' + FILENAME
  print("writing results to = " + str(fname))
  experimental_results.to_csv(fname)

processing file 4.csv
sampling 30 % of the data
preparing iteration 1
processing file :  4.csv fraction : 0.3
the number of rows in the data set is 2629
deadline 15 , num-servers 43 num-users 692
sampling 30 % of the data
preparing iteration 2
processing file :  4.csv fraction : 0.3
the number of rows in the data set is 2629
deadline 15 , num-servers 43 num-users 692
sampling 30 % of the data
preparing iteration 3
processing file :  4.csv fraction : 0.3
the number of rows in the data set is 2629
deadline 15 , num-servers 43 num-users 692
sampling 30 % of the data
preparing iteration 4
processing file :  4.csv fraction : 0.3
the number of rows in the data set is 2629
deadline 15 , num-servers 43 num-users 692
sampling 30 % of the data
preparing iteration 5
processing file :  4.csv fraction : 0.3
the number of rows in the data set is 2629
unable to find the optimal solution
deadline 15 , num-servers 43 num-users 692
sampling 30 % of the data
preparing iteration 6
processing file :  4.csv

### ONLY MAXIMIZING USERS

In [None]:
TL = 400
for FILENAME in ['4.csv']:
  experimental_results = pd.DataFrame()
  print ("processing file " + str(FILENAME))

  for data_points in range(1 , 7):
      print("sampling 30 % of the data")
      print ("preparing iteration " + str(data_points))
      res = solve_data_set(FILENAME , 0.3 , lp_mini=0.0 , lp_time_limit = TL)
      experimental_results = pd.concat([experimental_results , res])
  fname = 'u_max_' + FILENAME
  print("writing results to = " + str(fname))
  experimental_results.to_csv(fname)

processing file 4.csv
sampling 30 % of the data
preparing iteration 1
processing file :  4.csv fraction : 0.3
the number of rows in the data set is 2629
deadline 10 , num-servers 43 num-users 692
sampling 30 % of the data
preparing iteration 2
processing file :  4.csv fraction : 0.3
the number of rows in the data set is 2629
deadline 10 , num-servers 43 num-users 692
sampling 30 % of the data
preparing iteration 3
processing file :  4.csv fraction : 0.3
the number of rows in the data set is 2629
deadline 10 , num-servers 43 num-users 692
sampling 30 % of the data
preparing iteration 4
processing file :  4.csv fraction : 0.3
the number of rows in the data set is 2629


## small data sets


In [None]:
TL = 180
for FILENAME in ['1.csv']:
  experimental_results = pd.DataFrame()
  print ("processing file " + str(FILENAME))

  for data_points in range(1 , 10):
      print("sampling 30 % of the data")
      print ("preparing iteration " + str(data_points))
      res = solve_data_set(FILENAME , 0.3 , lp_mini=0.0 , lp_time_limit = TL)
      experimental_results = pd.concat([experimental_results , res])
  fname = 'u_max_' + FILENAME
  print("writing results to = " + str(fname))
  experimental_results.to_csv(fname)

In [None]:
TL = 180
for FILENAME in ['1.csv']:
  experimental_results = pd.DataFrame()
  print ("processing file " + str(FILENAME))

  for data_points in range(5):
      print("sampling 30 % of the data")
      print ("preparing iteration " + str(data_points))
      res = solve_data_set(FILENAME , 0.3 , lp_mini=0.1 , lp_time_limit = TL)
      experimental_results = pd.concat([experimental_results , res])
  fname = 'user_max_server_min_small' + FILENAME
  print("writing results to = " + str(fname))
  experimental_results.to_csv(fname)

0
1
2
3
4
5
6
