In [None]:
'''
    Setup the project
'''
# Make directory build if it doesn't exist
!mkdir -p build
!cd build && cmake .. -DCMAKE_BUILD_TYPE=Release && make

In [None]:
# Install the python packages
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Running PSRS

In [30]:
# Helper functions for running the experiments

def runProgram(numElements, seed, useUniprocessor, numThreads=1):
    '''
        Runs the program and capture the results
        The command to run is `cd build && ./main {numThreads} {numElements} {seed} {useUniprocessor}`` which outputs
        ['p: numThreads n: numElements seed: seed useUniprocessor: useUniprocessor', 'time', 'Success'] where time is in nanoseconds
    '''
    sys.stdout.flush()
    result = !cd build && ./main {numThreads} {numElements} {seed} {useUniprocessor}
    assert len(result)==3 and result[2] == 'Success', 'Experiment failed'
    time = int(result[1])
    return time

def collectData(numElements, seed, useUniprocessor, times=7, numThreads=1):
    '''
        Collects the data by running the program multiple times
    '''
    data = []
    for i in range(times):
        newSeed = seed+(i+1)*20
        print(f'Running program {i+1}/{times} for numElements={numElements} seed={newSeed} useUniprocessor={useUniprocessor} numThreads={numThreads}')
        time = runProgram(numElements, newSeed, useUniprocessor, numThreads)
        data.append(time)
    return data
    
def collectMultiprocessorData(numElements, seed, numThreads, lastRuns=5, times=7):
    '''
        Collects the data for the multiprocessor implementation
    '''
    assert lastRuns <= times, 'lastRuns should be less than or equal to times'
    data = []
    time_uniprocessor = collectData(numElements, seed, 1, times)
    data.append((1, sum(time_uniprocessor[-lastRuns:])/lastRuns))
    for i in range(2, numThreads+1):
        time = collectData(numElements, seed, 0, times, i)
        # take avg of last x runs
        avg = sum(time[-lastRuns:])/lastRuns
        data.append([i, avg])
    return data

def writeToCSV(filename, data, header):
    '''
        Writes the data to a csv file
    '''
    df = pd.DataFrame(data, columns=header)
    df.to_csv(filename, index=False)
    print(f'Wrote data to {filename}')

def saveExperimentData(experiment_data, id):
    writeToCSV(f'experiment{id}.csv', experiment_data, ['numThreads', 'time'])
    experiment_speedup = list(map(lambda x: (x[0], experiment_data[0][1]/x[1]), experiment_data))
    writeToCSV(f'experiment{id}_speedup.csv', experiment_speedup, ['numThreads', 'speedup'])
    
def runAndSaveExperiment(numElements, seed, maxThreads, id):
    print(f'Running experiment {id} for numElements={numElements} seed={seed} maxThreads={maxThreads}')
    experiment_data = collectMultiprocessorData(numElements, seed, maxThreads)
    saveExperimentData(experiment_data, id)
    
def getExperimentSettings():
    MAX_THREADS = 16
    seed = 100
    numElements = 10000000
    NUM_ELEMENTS_MULTIPLIER = 2
    experiment_settings = []
    SEED_STEP = 13
    NUM_EXPERIMENTS = 8
    for i in range(1, NUM_EXPERIMENTS):
        experiment_settings.append((numElements, seed, MAX_THREADS, i))
        numElements = numElements * NUM_ELEMENTS_MULTIPLIER
        seed = seed + SEED_STEP
    return experiment_settings

def runAllExperiments():
    experiment_settings = getExperimentSettings()
    for experiment in experiment_settings:
        runAndSaveExperiment(*experiment)

In [31]:
runAllExperiments()

Running experiment 1 for numElements=10000000 seed=100 maxThreads=16
Running experiment 2 for numElements=20000000 seed=113 maxThreads=16
Running experiment 3 for numElements=40000000 seed=126 maxThreads=16
Running experiment 4 for numElements=80000000 seed=139 maxThreads=16
Running experiment 5 for numElements=160000000 seed=152 maxThreads=16
Running experiment 6 for numElements=320000000 seed=165 maxThreads=16
Running experiment 7 for numElements=640000000 seed=178 maxThreads=16


In [None]:
# Helper functions for plotting the results
def plotExperiments(experiments, title, xlabel, ylabel, filename):
    '''
        Plots the experiments
    '''
    plt.figure(figsize=(10, 10))
    for experiment in experiments:
        df = pd.read_csv(experiment)
        plt.plot(df['numThreads'], df['time'], label=experiment)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend()
    plt.savefig(filename)