In [1]:
# Imports, nothing to see here
import random
import math
import sys,os

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
sys.path.insert(0, os.path.abspath('..'))
from spreadsheet.cell import Cell
from spreadsheet.arraySpreadsheet import ArraySpreadsheet
from spreadsheet.linkedlistSpreadsheet import LinkedListSpreadsheet
from spreadsheet.csrSpreadsheet import CSRSpreadsheet
from spreadsheet.baseSpreadsheet import BaseSpreadsheet
from time import perf_counter

In [2]:
def create_random_cells(num_cells: int, max_col:int, max_row:int) -> list[Cell]:
    """
    num_cells: number of cells you want
    max_col: maximum number of columns you want in your spreadsheet
    max_row: maximum number of rows you want in your spreadsheet
    """
    lCells = []
    lCells.append(Cell((max_row-1),(max_col-1),round(random.uniform(0.1,9.9), ndigits=2)))
    for _ in range(num_cells):
        row = random.randint(0, max_col-1)
        col = random.randint(0, max_row-1)
        val = round(random.uniform(0.1,9.9), ndigits=1)
        lCells.append(Cell(row, col, val))
    return lCells

def create_density(density:float, max_cols:int, max_rows:int):
    """
    percentage is a float of the percent of non-None cells you want, e.g. 0.1 represents 10% cells
    with non-None values.
    """
    ret = []
    ret.append(Cell((max_rows-1),(max_cols-1),(round(random.uniform(0.1,9.9), ndigits=2))))
    cells_to_make = max_rows*max_cols*density-1
    while cells_to_make > 0:
        row = random.randint(0, max_cols-1)
        col = random.randint(0, max_rows-1)
        val = round(random.uniform(0.1,9.9), ndigits=2)
        ret.append(Cell(row, col,val))
        cells_to_make -=1
    return ret

def different_densities(cells_to_generate:int):
    low = random.uniform(0.01,0.3)
    med = random.uniform(0.31,0.60)
    high = random.uniform(0.61,0.99)
    densities = [low, med, high ]
    lCells_list = []
    for x in densities:
        lCells_list.append(create_density(cells_to_generate,x))
    return lCells_list

def write_to_file(cells_to_generate:int, max_col:int, max_row:int):
    file = open("sample_data.txt","w")
    file.write(create_random_cells(cells_to_generate, max_col, max_row))
    file.close()

def display_array(array: ArraySpreadsheet):
    arr =array.array 
    for x in arr:
        for y in x:
            if(y.val == None):
                print("("+str(y.row),str(y.col)+" N)", end = " ")
            else:
                print(y, end = " ")
        print() #newline
    print("finished")

def get_lCells()->list[Cell]:
    """
    basically read from the file and create a list of cells from the data from the file
    """
    try:
        lcells = []
        dataFile = open("sample_data.txt", 'r')
        for line in dataFile:
            values = line.split()
            currRow = int(values[0])
            currCol = int(values[1])
            currVal = float(values[2])
            currCell = Cell(currRow, currCol, currVal)
            # each line contains a cell
            lcells.append(currCell)
        dataFile.close()
        return lcells
    except FileNotFoundError as e:
        print("Data file doesn't exist.")

def set_up_cells(cells_to_generate, max_col, max_row):
    write_to_file(cells_to_generate, max_col, max_row)
    return get_lCells()

def test_buildSpreadSheet(spreadSheet:BaseSpreadsheet,lCells:list[Cell]):
    start_time = perf_counter()
    spreadSheet.buildSpreadsheet(lCells)
    end_time = perf_counter()
    return end_time-start_time

def test_entries(spreadSheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadSheet.entries()
    end_time = perf_counter()
    return end_time-start_time

def test_update(spreadSheet:BaseSpreadsheet, rowIndex: int, colIndex:int, val:float):
    start_time = perf_counter()
    spreadSheet.update(rowIndex, colIndex,val)
    end_time = perf_counter()
    return end_time-start_time

def test_append_row(spreadSheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadSheet.appendRow()
    end_time = perf_counter()
    return end_time-start_time

def test_append_col(spreadSheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadSheet.appendCol()
    end_time = perf_counter()
    return end_time-start_time

def test_insert_row(spreadSheet:BaseSpreadsheet, rowIndex:int):
    start_time = perf_counter()
    spreadSheet.insertRow(rowIndex)
    end_time = perf_counter()
    return end_time-start_time

def test_insert_col(spreadsheet:BaseSpreadsheet, colIndex:int):
    start_time = perf_counter()
    spreadsheet.insertCol(colIndex)
    end_time = perf_counter()
    return end_time-start_time

def test_find(spreadSheet:BaseSpreadsheet, value:float):
    start_time = perf_counter()
    spreadSheet.find(value)
    end_time = perf_counter()
    return end_time-start_time



def test_row_num(spreadsheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadsheet.rowNum()
    end_time = perf_counter()
    return end_time-start_time

def test_col_num(spreadsheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadsheet.colNum()
    end_time = perf_counter()
    return end_time-start_time

In [3]:
#to test:
#3 different sizes -> 3 different  density
# 0.1, 0.5, 0.9
# (50,50) (500,500) (1000,1000)
#for all 3 of the abstract datat types


In [4]:
#generate datasets
#number of cells in each dataset:
#size/density:| 0.1     | 0.5     | 0.9
#50x50        | 250     | 1,250   |2,250
#100x100      | 1,000   | 5,000   |9,000
#1000x1000    | 10,000  | 500,000 |900,000
datasets = []

for x in [0.1,0.5,0.9]:
    for y in [50,100,1000]:
        datasets.append(create_density(x,y,y))

In [5]:
#create small arrays
small_spreadsheets = [ArraySpreadsheet(), ArraySpreadsheet(), ArraySpreadsheet()]
small_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(0,datasets.__len__(), 3):
    small_spreadsheets_tests.append(test_buildSpreadSheet(small_spreadsheets[counter],datasets[x])*1000)
    small_spreadsheets_tests.append(test_append_col(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_append_row(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_insert_col(small_spreadsheets[counter],x)*1000)
    small_spreadsheets_tests.append(test_insert_row(small_spreadsheets[counter],x)*1000)
    small_spreadsheets_tests.append(test_col_num(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_row_num(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_entries(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_update(small_spreadsheets[counter],x,x,x)*1000)
    small_spreadsheets_tests.append(test_find(small_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
small_df = pd.DataFrame()
small_df["low_density"] = small_spreadsheets_tests[0:10]
small_df["med_density"]= small_spreadsheets_tests[10:20]
small_df["high_density"] = small_spreadsheets_tests[20:30]

small_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(small_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,1.1168,1.6858,2.4375
append_col,0.02,0.0136,0.0155
append_row,0.0189,0.0202,0.0216
insert_col,0.198,0.1914,0.184
insert_row,0.1277,0.1214,0.1164
col_num,0.0004,0.0009,0.0005
row_num,0.0002,0.0002,0.0003
entries,0.75,2.8702,4.4135
update,0.0011,0.0018,0.002
find,0.2072,0.2519,0.2923


In [6]:
#create medium arrays
med_spreadsheets = [ArraySpreadsheet(), ArraySpreadsheet(), ArraySpreadsheet()]
med_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(1,datasets.__len__(), 3):
    med_spreadsheets_tests.append(test_buildSpreadSheet(med_spreadsheets[counter],datasets[x])*1000)
    med_spreadsheets_tests.append(test_append_col(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_append_row(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_insert_col(med_spreadsheets[counter],x)*1000)
    med_spreadsheets_tests.append(test_insert_row(med_spreadsheets[counter],x)*1000)
    med_spreadsheets_tests.append(test_col_num(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_row_num(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_entries(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_update(med_spreadsheets[counter],x,x,x)*1000)
    med_spreadsheets_tests.append(test_find(med_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
med_df = pd.DataFrame()
med_df["low_density"] = med_spreadsheets_tests[0:10]
med_df["med_density"]= med_spreadsheets_tests[10:20]
med_df["high_density"] = med_spreadsheets_tests[20:30]

med_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(med_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,4.8751,7.5306,10.3934
append_col,0.0314,0.0378,0.0376
append_row,0.04,0.0481,0.0412
insert_col,0.9212,0.9033,0.7569
insert_row,0.6769,0.4738,0.479
col_num,0.0011,0.0005,0.0009
row_num,0.0004,0.0004,0.0004
entries,3.2859,13.6408,20.1966
update,0.0034,0.0028,0.0024
find,0.8998,1.121,1.1872


In [16]:
#create large arrays
large_spreadsheets = [ArraySpreadsheet(), ArraySpreadsheet(), ArraySpreadsheet()]
large_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(2,datasets.__len__(), 3):
    large_spreadsheets_tests.append(test_buildSpreadSheet(large_spreadsheets[counter],datasets[x])*1000)
    large_spreadsheets_tests.append(test_append_col(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_append_row(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_insert_col(large_spreadsheets[counter],x)*1000)
    large_spreadsheets_tests.append(test_insert_row(large_spreadsheets[counter],x)*1000)
    large_spreadsheets_tests.append(test_col_num(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_row_num(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_entries(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_update(large_spreadsheets[counter],x,x,x)*1000)
    large_spreadsheets_tests.append(test_find(large_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
large_df = pd.DataFrame()
large_df["low_density"] = large_spreadsheets_tests[0:10]
large_df["med_density"]= large_spreadsheets_tests[10:20]
large_df["high_density"] = large_spreadsheets_tests[20:30]
large_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(large_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,912.1031,1700.4706,4089.162
append_col,47.2986,191.5458,292.5267
append_row,0.5849,0.4703,0.4507
insert_col,231.643,262.6054,327.4506
insert_row,135.5436,234.6768,287.9728
col_num,0.0023,0.002,0.0028
row_num,0.0006,0.0012,0.0007
entries,671.5251,2712.4325,4195.767
update,0.0034,0.0041,0.0031
find,152.6001,259.7776,748.4875


In [15]:
#create small linkedlist
small_ll_spreadsheets = [LinkedListSpreadsheet(), LinkedListSpreadsheet(), LinkedListSpreadsheet()]
small_ll_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(0,datasets.__len__(), 3):
    small_ll_spreadsheets_tests.append(test_buildSpreadSheet(small_ll_spreadsheets[counter],datasets[x])*1000)
    small_ll_spreadsheets_tests.append(test_append_col(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_append_row(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_insert_col(small_ll_spreadsheets[counter],x)*1000)
    small_ll_spreadsheets_tests.append(test_insert_row(small_ll_spreadsheets[counter],x)*1000)
    small_ll_spreadsheets_tests.append(test_col_num(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_row_num(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_entries(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_update(small_ll_spreadsheets[counter],x,x,x)*1000)
    small_ll_spreadsheets_tests.append(test_find(small_ll_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
small_ll_df = pd.DataFrame()
small_ll_df["low_density"] = small_ll_spreadsheets_tests[0:10]
small_ll_df["med_density"]= small_ll_spreadsheets_tests[10:20]
small_ll_df["high_density"] = small_ll_spreadsheets_tests[20:30]

small_ll_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(small_ll_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,4.3394,8.7403,13.0628
append_col,0.043,0.0435,0.0544
append_row,0.0364,0.0361,0.0424
insert_col,0.6991,0.6293,0.7004
insert_row,0.3359,0.2902,0.2876
col_num,0.0042,0.0047,0.0044
row_num,0.0043,0.004,0.0042
entries,0.3412,0.454,0.5568
update,0.0017,0.002,0.0027
find,0.3469,0.345,0.3508


In [9]:
#create medium linkedlist
med_ll_spreadsheets = [LinkedListSpreadsheet(), LinkedListSpreadsheet(), LinkedListSpreadsheet()]
med_ll_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(1,datasets.__len__(), 3):
    med_ll_spreadsheets_tests.append(test_buildSpreadSheet(med_ll_spreadsheets[counter],datasets[x])*1000)
    med_ll_spreadsheets_tests.append(test_append_col(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_append_row(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_insert_col(med_ll_spreadsheets[counter],x)*1000)
    med_ll_spreadsheets_tests.append(test_insert_row(med_ll_spreadsheets[counter],x)*1000)
    med_ll_spreadsheets_tests.append(test_col_num(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_row_num(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_entries(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_update(med_ll_spreadsheets[counter],x,x,x)*1000)
    med_ll_spreadsheets_tests.append(test_find(med_ll_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
med_ll_df = pd.DataFrame()
med_ll_df["low_density"] = med_ll_spreadsheets_tests[0:10]
med_ll_df["med_density"]= med_ll_spreadsheets_tests[10:20]
med_ll_df["high_density"] = med_ll_spreadsheets_tests[20:30]

med_ll_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(med_ll_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,23.1152,68.5027,131.2999
append_col,0.1004,0.1146,0.1305
append_row,0.0771,0.0777,0.0959
insert_col,7.0482,7.3969,7.6152
insert_row,5.7446,3.8841,4.6115
col_num,0.0209,0.023,0.0218
row_num,0.0305,0.0101,0.0171
entries,3.4814,4.4804,5.1343
update,0.0046,0.0063,0.0065
find,3.738,3.902,3.7019


In [10]:
#create large linkedlist
large_ll_spreadsheets = [LinkedListSpreadsheet(), LinkedListSpreadsheet(), LinkedListSpreadsheet()]
large_ll_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(1,datasets.__len__(), 3):
    large_ll_spreadsheets_tests.append(test_buildSpreadSheet(large_ll_spreadsheets[counter],datasets[x])*1000)
    large_ll_spreadsheets_tests.append(test_append_col(large_ll_spreadsheets[counter])*1000)
    large_ll_spreadsheets_tests.append(test_append_row(large_ll_spreadsheets[counter])*1000)
    large_ll_spreadsheets_tests.append(test_insert_col(large_ll_spreadsheets[counter],x)*1000)
    large_ll_spreadsheets_tests.append(test_insert_row(large_ll_spreadsheets[counter],x)*1000)
    large_ll_spreadsheets_tests.append(test_col_num(large_ll_spreadsheets[counter])*1000)
    large_ll_spreadsheets_tests.append(test_row_num(large_ll_spreadsheets[counter])*1000)
    large_ll_spreadsheets_tests.append(test_entries(large_ll_spreadsheets[counter])*1000)
    large_ll_spreadsheets_tests.append(test_update(large_ll_spreadsheets[counter],x,x,x)*1000)
    large_ll_spreadsheets_tests.append(test_find(large_ll_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
large_ll_df = pd.DataFrame()
large_ll_df["low_density"] = large_ll_spreadsheets_tests[0:10]
large_ll_df["med_density"]= large_ll_spreadsheets_tests[10:20]
large_ll_df["high_density"] = large_ll_spreadsheets_tests[20:30]

large_ll_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(large_ll_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,27.1218,70.9785,112.0616
append_col,0.1188,0.1066,0.1873
append_row,0.0967,0.0691,0.1164
insert_col,6.7904,3.0721,4.5779
insert_row,4.9949,2.8353,3.7238
col_num,0.0236,0.0233,0.0208
row_num,0.0229,0.0116,0.0234
entries,3.7235,2.284,5.1182
update,0.0033,0.004,0.0081
find,2.9475,1.3826,4.5069


In [11]:
#create small CSR

small_CSR_spreadsheets = [CSRSpreadsheet(), CSRSpreadsheet(), CSRSpreadsheet()]
small_CSR_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(0,datasets.__len__(), 3):
    small_CSR_spreadsheets_tests.append(test_buildSpreadSheet(small_CSR_spreadsheets[counter],datasets[x])*1000)
    small_CSR_spreadsheets_tests.append(test_append_col(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_append_row(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_insert_col(small_CSR_spreadsheets[counter],x)*1000)
    small_CSR_spreadsheets_tests.append(test_insert_row(small_CSR_spreadsheets[counter],x)*1000)
    small_CSR_spreadsheets_tests.append(test_col_num(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_row_num(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_entries(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_update(small_CSR_spreadsheets[counter],x,x,float(x))*1000)
    small_CSR_spreadsheets_tests.append(test_find(small_CSR_spreadsheets[counter],float(x))*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
small_CSR_df = pd.DataFrame()
small_CSR_df["low_density"] = small_CSR_spreadsheets_tests[0:10]
small_CSR_df["med_density"]= small_CSR_spreadsheets_tests[10:20]
small_CSR_df["high_density"] = small_CSR_spreadsheets_tests[20:30]

small_CSR_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(small_CSR_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,0.7709,4.4977,8.948
append_col,0.001,0.0024,0.0022
append_row,0.0008,0.0018,0.0014
insert_col,0.0163,0.1251,0.205
insert_row,0.0029,0.0029,0.0032
col_num,0.0006,0.0006,0.0005
row_num,0.0005,0.0004,0.0004
entries,0.397,1.0888,1.9566
update,0.0529,0.3235,0.5407
find,0.0984,0.4701,0.8386


In [12]:
#create medium CSR

med_CSR_spreadsheets = [CSRSpreadsheet(), CSRSpreadsheet(), CSRSpreadsheet()]
med_CSR_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(1,datasets.__len__(), 3):
    med_CSR_spreadsheets_tests.append(test_buildSpreadSheet(med_CSR_spreadsheets[counter],datasets[x])*1000)
    med_CSR_spreadsheets_tests.append(test_append_col(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_append_row(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_insert_col(med_CSR_spreadsheets[counter],x)*1000)
    med_CSR_spreadsheets_tests.append(test_insert_row(med_CSR_spreadsheets[counter],x)*1000)
    med_CSR_spreadsheets_tests.append(test_col_num(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_row_num(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_entries(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_update(med_CSR_spreadsheets[counter],x,x,float(x))*1000)
    med_CSR_spreadsheets_tests.append(test_find(med_CSR_spreadsheets[counter],float(x))*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
med_CSR_df = pd.DataFrame()
med_CSR_df["low_density"] = med_CSR_spreadsheets_tests[0:10]
med_CSR_df["med_density"]= med_CSR_spreadsheets_tests[10:20]
med_CSR_df["high_density"] = med_CSR_spreadsheets_tests[20:30]
# print
med_CSR_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(med_CSR_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,4.2012,21.4662,40.427
append_col,0.0018,0.0023,0.0022
append_row,0.0006,0.0013,0.0009
insert_col,0.0943,0.4357,0.8151
insert_row,0.0039,0.0034,0.0033
col_num,0.0006,0.0007,0.0007
row_num,0.0003,0.0004,0.0003
entries,1.6281,4.3405,8.238
update,0.0806,1.1838,2.079
find,0.3725,1.8989,3.4077


In [13]:
#create large CSR

large_CSR_spreadsheets = [CSRSpreadsheet(), CSRSpreadsheet(), CSRSpreadsheet()]
large_CSR_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(2,datasets.__len__(), 3):
    large_CSR_spreadsheets_tests.append(test_buildSpreadSheet(large_CSR_spreadsheets[counter],datasets[x])*1000)
    large_CSR_spreadsheets_tests.append(test_append_col(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_append_row(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_insert_col(large_CSR_spreadsheets[counter],x)*1000)
    large_CSR_spreadsheets_tests.append(test_insert_row(large_CSR_spreadsheets[counter],x)*1000)
    large_CSR_spreadsheets_tests.append(test_col_num(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_row_num(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_entries(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_update(large_CSR_spreadsheets[counter],x,x,x)*1000)
    large_CSR_spreadsheets_tests.append(test_find(large_CSR_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
large_CSR_df = pd.DataFrame()
large_CSR_df["low_density"] = large_CSR_spreadsheets_tests[0:10]
large_CSR_df["med_density"]= large_CSR_spreadsheets_tests[10:20]
large_CSR_df["high_density"] = large_CSR_spreadsheets_tests[20:30]

large_CSR_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(large_CSR_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,687.1569,4148.2064,8061.9538
append_col,0.0052,0.0053,0.0028
append_row,0.0023,0.0016,0.0017
insert_col,15.6666,86.0794,161.0624
insert_row,0.0112,0.0103,0.0062
col_num,0.0007,0.0006,0.0008
row_num,0.0005,0.0005,0.0005
entries,110.8569,584.4107,2886.4307
update,0.8986,157.5582,288.1495
find,49.2297,267.065,490.7069


In [14]:
arrays = [small_df,med_df,large_df,small_ll_df,med_ll_df,large_ll_df,small_CSR_df,med_CSR_df,large_CSR_df]
array_names = ["small_df","med_df","large_df","small_ll_df","med_ll_df","large_ll_df","small_CSR_df","med_CSR_df","large_CSR_df"]
tot_df = pd.DataFrame() 
for index in range(len(arrays)):
    print(array_names[index])
    print(arrays[index].sum())
# display(small_df)
# display(med_df)
# display(large_df)W

# display(small_ll_df)
# display(med_ll_df)
# display(large_ll_df)

# display(small_CSR_df)
# display(med_CSR_df)
# display(large_CSR_df)

small_df
low_density     2.4403
med_density     5.1574
high_density    7.4836
dtype: float64
med_df
low_density     10.7352
med_density     23.7591
high_density    33.0956
dtype: float64
large_df
low_density     2344.0859
med_density     5995.0139
high_density    8271.6590
dtype: float64
small_ll_df
low_density     2248.0340
med_density       12.5915
high_density      16.6547
dtype: float64
med_ll_df
low_density      43.3609
med_density      88.3978
high_density    152.6346
dtype: float64
large_ll_df
low_density      45.8434
med_density      80.7671
high_density    130.3444
dtype: float64
small_CSR_df
low_density      1.3413
med_density      6.5133
high_density    12.4966
dtype: float64
med_CSR_df
low_density      6.3839
med_density     29.3332
high_density    54.9742
dtype: float64
large_CSR_df
low_density       863.8286
med_density      5243.3380
high_density    11888.3153
dtype: float64


In [94]:
a = [small_df,small_ll_df,small_CSR_df],[med_df,med_ll_df,med_CSR_df],[large_df,large_ll_df,large_CSR_df]
data = []
array_names = ["small_df","small_ll_df","small_CSR_df"],["med_df","med_ll_df","med_CSR_df"],["large_df","large_ll_df","large_CSR_df"]
title = ["Small sized dataset","Medium sized dataset", "Large sized dataset"]
names = []
for array in range(len(a)):
    temp_data = [[],[],[]]
    temp_names = [[],[],[]]
    for df in a[array]:
        for i,col in enumerate(df):
            temp_names[i].append(col)
            temp_data[i].append(df[col].sum())
    data.append(temp_data)
    names.append(temp_names)

display(names,data)
for i in range(len(data)):
    X = np.arange(3)
    fig = plt.figure()
    plt.title(title)
    plt.xlabel("Density")
    plt.ylabel("Time (ms)")
    # plt.xticks(array_names[i],)
    plt.bar(X + 0.00, data[i][0], color = 'b', width = 0.25)
    plt.bar(X + 0.25, data[i][1], color = 'g', width = 0.25)
    plt.bar(X + 0.50, data[i][2], color = 'r', width = 0.25)

        

[[['low_density', 'low_density', 'low_density'],
  ['med_density', 'med_density', 'med_density'],
  ['high_density', 'high_density', 'high_density']],
 [['low_density', 'low_density', 'low_density'],
  ['med_density', 'med_density', 'med_density'],
  ['high_density', 'high_density', 'high_density']],
 [['low_density', 'low_density', 'low_density'],
  ['med_density', 'med_density', 'med_density'],
  ['high_density', 'high_density', 'high_density']]]

[[[2.440299984300509, 6.152099973405711, 1.3413000124273822],
  [5.157399995368905, 10.549100014031865, 6.513299988000654],
  [7.48359999852255, 15.06650002556853, 12.496599956648424]],
 [[10.735200034105219, 43.36090000288095, 6.383900021319278],
  [23.75909997499548, 88.39779999107122, 29.333200000110082],
  [33.09560001071077, 152.63460000278428, 54.974200000287965]],
 [[2151.3046999752987, 45.84339998837095, 863.8286000204971],
  [5361.98630000581, 80.76709997840226, 5243.3380000293255],
  [9941.823899993324, 130.3444000222953, 11888.31530000607]]]

TypeError: 'str' object is not callable

<Figure size 640x480 with 0 Axes>