In [110]:
# Imports, nothing to see here
import random
import math
import sys,os

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
sys.path.insert(0, os.path.abspath('..'))
from spreadsheet.cell import Cell
from spreadsheet.arraySpreadsheet import ArraySpreadsheet
from spreadsheet.linkedlistSpreadsheet import LinkedListSpreadsheet
from spreadsheet.csrSpreadsheet import CSRSpreadsheet
from spreadsheet.baseSpreadsheet import BaseSpreadsheet
from time import perf_counter

In [111]:
def create_random_cells(num_cells: int, max_col:int, max_row:int) -> list[Cell]:
    """
    num_cells: number of cells you want
    max_col: maximum number of columns you want in your spreadsheet
    max_row: maximum number of rows you want in your spreadsheet
    """
    lCells = []
    lCells.append(Cell((max_row-1),(max_col-1),round(random.uniform(0.1,9.9), ndigits=2)))
    for _ in range(num_cells):
        row = random.randint(0, max_col-1)
        col = random.randint(0, max_row-1)
        val = round(random.uniform(0.1,9.9), ndigits=1)
        lCells.append(Cell(row, col, val))
    return lCells

def create_density(density:float, max_cols:int, max_rows:int):
    """
    percentage is a float of the percent of non-None cells you want, e.g. 0.1 represents 10% cells
    with non-None values.
    """
    ret = []
    ret.append(Cell((max_rows-1),(max_cols-1),(round(random.uniform(0.1,9.9), ndigits=2))))
    cells_to_make = max_rows*max_cols*density-1
    while cells_to_make > 0:
        row = random.randint(0, max_cols-1)
        col = random.randint(0, max_rows-1)
        val = round(random.uniform(0.1,9.9), ndigits=2)
        ret.append(Cell(row, col,val))
        cells_to_make -=1
    return ret

def different_densities(cells_to_generate:int):
    low = random.uniform(0.01,0.3)
    med = random.uniform(0.31,0.60)
    high = random.uniform(0.61,0.99)
    densities = [low, med, high ]
    lCells_list = []
    for x in densities:
        lCells_list.append(create_density(cells_to_generate,x))
    return lCells_list

def write_to_file(cells_to_generate:int, max_col:int, max_row:int):
    file = open("sample_data.txt","w")
    file.write(create_random_cells(cells_to_generate, max_col, max_row))
    file.close()

def display_array(array: ArraySpreadsheet):
    arr =array.array 
    for x in arr:
        for y in x:
            if(y.val == None):
                print("("+str(y.row),str(y.col)+" N)", end = " ")
            else:
                print(y, end = " ")
        print() #newline
    print("finished")

def get_lCells()->list[Cell]:
    """
    basically read from the file and create a list of cells from the data from the file
    """
    try:
        lcells = []
        dataFile = open("sample_data.txt", 'r')
        for line in dataFile:
            values = line.split()
            currRow = int(values[0])
            currCol = int(values[1])
            currVal = float(values[2])
            currCell = Cell(currRow, currCol, currVal)
            # each line contains a cell
            lcells.append(currCell)
        dataFile.close()
        return lcells
    except FileNotFoundError as e:
        print("Data file doesn't exist.")

def set_up_cells(cells_to_generate, max_col, max_row):
    write_to_file(cells_to_generate, max_col, max_row)
    return get_lCells()

def test_buildSpreadSheet(spreadSheet:BaseSpreadsheet,lCells:list[Cell]):
    start_time = perf_counter()
    spreadSheet.buildSpreadsheet(lCells)
    end_time = perf_counter()
    return end_time-start_time

def test_entries(spreadSheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadSheet.entries()
    end_time = perf_counter()
    return end_time-start_time

def test_update(spreadSheet:BaseSpreadsheet, rowIndex: int, colIndex:int, val:float):
    start_time = perf_counter()
    spreadSheet.update(rowIndex, colIndex,val)
    end_time = perf_counter()
    return end_time-start_time

def test_append_row(spreadSheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadSheet.appendRow()
    end_time = perf_counter()
    return end_time-start_time

def test_append_col(spreadSheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadSheet.appendCol()
    end_time = perf_counter()
    return end_time-start_time

def test_insert_row(spreadSheet:BaseSpreadsheet, rowIndex:int):
    start_time = perf_counter()
    spreadSheet.insertRow(rowIndex)
    end_time = perf_counter()
    return end_time-start_time

def test_insert_col(spreadsheet:BaseSpreadsheet, colIndex:int):
    start_time = perf_counter()
    spreadsheet.insertCol(colIndex)
    end_time = perf_counter()
    return end_time-start_time

def test_find(spreadSheet:BaseSpreadsheet, value:float):
    start_time = perf_counter()
    spreadSheet.find(value)
    end_time = perf_counter()
    return end_time-start_time



def test_row_num(spreadsheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadsheet.rowNum()
    end_time = perf_counter()
    return end_time-start_time

def test_col_num(spreadsheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadsheet.colNum()
    end_time = perf_counter()
    return end_time-start_time

In [112]:
#to test:
#3 different sizes -> 3 different  density
# 0.1, 0.5, 0.9
# (50,50) (500,500) (1000,1000)
#for all 3 of the abstract datat types


In [113]:
#generate datasets
#number of cells in each dataset:
#size/density:| 0.1     | 0.5     | 0.9
#50x50        | 250     | 1,250   |2,250
#100x100      | 1,000   | 5,000   |9,000
#1000x1000    | 10,000  | 500,000 |900,000
datasets = []

for x in [0.1,0.5,0.9]:
    for y in [50,100,1000]:
        datasets.append(create_density(x,y,y))

In [114]:
#create small arrays
small_spreadsheets = [ArraySpreadsheet(), ArraySpreadsheet(), ArraySpreadsheet()]
small_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(0,datasets.__len__(), 3):
    small_spreadsheets_tests.append(test_buildSpreadSheet(small_spreadsheets[counter],datasets[x])*1000)
    small_spreadsheets_tests.append(test_append_col(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_append_row(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_insert_col(small_spreadsheets[counter],x)*1000)
    small_spreadsheets_tests.append(test_insert_row(small_spreadsheets[counter],x)*1000)
    small_spreadsheets_tests.append(test_col_num(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_row_num(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_entries(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_update(small_spreadsheets[counter],x,x,x)*1000)
    small_spreadsheets_tests.append(test_find(small_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
small_df = pd.DataFrame()
small_df["low_density"] = small_spreadsheets_tests[0:10]
small_df["med_density"]= small_spreadsheets_tests[10:20]
small_df["high_density"] = small_spreadsheets_tests[20:30]

small_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(small_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,1.1863,2.4046,2.5304
append_col,0.0148,0.0158,0.0138
append_row,0.019,0.0186,0.0854
insert_col,0.2287,0.1852,0.2016
insert_row,0.1346,0.1417,0.1149
col_num,0.0008,0.0004,0.0004
row_num,0.0004,0.0003,0.0003
entries,0.7527,2.9449,4.2124
update,0.0007,0.0011,0.0013
find,0.2129,0.3095,0.3357


In [115]:
#create medium arrays
med_spreadsheets = [ArraySpreadsheet(), ArraySpreadsheet(), ArraySpreadsheet()]
med_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(1,datasets.__len__(), 3):
    med_spreadsheets_tests.append(test_buildSpreadSheet(med_spreadsheets[counter],datasets[x])*1000)
    med_spreadsheets_tests.append(test_append_col(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_append_row(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_insert_col(med_spreadsheets[counter],x)*1000)
    med_spreadsheets_tests.append(test_insert_row(med_spreadsheets[counter],x)*1000)
    med_spreadsheets_tests.append(test_col_num(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_row_num(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_entries(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_update(med_spreadsheets[counter],x,x,x)*1000)
    med_spreadsheets_tests.append(test_find(med_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
med_df = pd.DataFrame()
med_df["low_density"] = med_spreadsheets_tests[0:10]
med_df["med_density"]= med_spreadsheets_tests[10:20]
med_df["high_density"] = med_spreadsheets_tests[20:30]

med_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(med_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,6.1499,8.8246,9.7698
append_col,0.0568,0.8132,0.0279
append_row,0.0506,0.0473,0.3933
insert_col,1.5543,0.8671,0.7365
insert_row,0.5954,0.4946,0.4825
col_num,0.0005,0.0004,0.0004
row_num,0.0006,0.0004,0.0004
entries,3.4122,13.4714,21.6585
update,0.0017,0.002,0.0033
find,1.3826,1.5426,1.4099


In [116]:
#create large arrays
large_spreadsheets = [ArraySpreadsheet(), ArraySpreadsheet(), ArraySpreadsheet()]
large_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(2,datasets.__len__(), 3):
    large_spreadsheets_tests.append(test_buildSpreadSheet(large_spreadsheets[counter],datasets[x])*1000)
    large_spreadsheets_tests.append(test_append_col(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_append_row(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_insert_col(large_spreadsheets[counter],x)*1000)
    large_spreadsheets_tests.append(test_insert_row(large_spreadsheets[counter],x)*1000)
    large_spreadsheets_tests.append(test_col_num(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_row_num(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_entries(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_update(large_spreadsheets[counter],x,x,x)*1000)
    large_spreadsheets_tests.append(test_find(large_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
large_df = pd.DataFrame()
large_df["low_density"] = large_spreadsheets_tests[0:10]
large_df["med_density"]= large_spreadsheets_tests[10:20]
large_df["high_density"] = large_spreadsheets_tests[20:30]
large_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(large_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,1826.8063,2287.2617,3135.5301
append_col,57.8521,224.9073,382.5043
append_row,0.6521,0.5501,2072.127
insert_col,219.6534,314.4271,358.6206
insert_row,202.5655,581.3139,344.8492
col_num,0.0044,0.0038,0.0025
row_num,0.0007,0.0011,0.0007
entries,733.4621,2945.7399,4342.6451
update,0.0043,0.0035,0.0031
find,182.5377,287.7043,370.0065


In [117]:
#create small linkedlist
small_ll_spreadsheets = [LinkedListSpreadsheet(), LinkedListSpreadsheet(), LinkedListSpreadsheet()]
small_ll_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(0,datasets.__len__(), 3):
    small_ll_spreadsheets_tests.append(test_buildSpreadSheet(small_ll_spreadsheets[counter],datasets[x])*1000)
    small_ll_spreadsheets_tests.append(test_append_col(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_append_row(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_insert_col(small_ll_spreadsheets[counter],x)*1000)
    small_ll_spreadsheets_tests.append(test_insert_row(small_ll_spreadsheets[counter],x)*1000)
    small_ll_spreadsheets_tests.append(test_col_num(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_row_num(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_entries(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_update(small_ll_spreadsheets[counter],x,x,x)*1000)
    small_ll_spreadsheets_tests.append(test_find(small_ll_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
small_ll_df = pd.DataFrame()
small_ll_df["low_density"] = small_ll_spreadsheets_tests[0:10]
small_ll_df["med_density"]= small_ll_spreadsheets_tests[10:20]
small_ll_df["high_density"] = small_ll_spreadsheets_tests[20:30]

small_ll_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(small_ll_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,3.9853,8.2506,13.3974
append_col,0.0436,0.0435,0.0554
append_row,0.04,0.0363,0.0388
insert_col,0.6518,0.6769,0.7221
insert_row,0.5163,0.289,0.2896
col_num,0.0054,0.0044,0.0043
row_num,0.004,0.0039,0.0042
entries,0.3553,0.5436,0.5391
update,0.0015,0.0024,0.0024
find,0.3116,0.3615,0.3486


In [118]:
#create medium linkedlist
med_ll_spreadsheets = [LinkedListSpreadsheet(), LinkedListSpreadsheet(), LinkedListSpreadsheet()]
med_ll_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(1,datasets.__len__(), 3):
    med_ll_spreadsheets_tests.append(test_buildSpreadSheet(med_ll_spreadsheets[counter],datasets[x])*1000)
    med_ll_spreadsheets_tests.append(test_append_col(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_append_row(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_insert_col(med_ll_spreadsheets[counter],x)*1000)
    med_ll_spreadsheets_tests.append(test_insert_row(med_ll_spreadsheets[counter],x)*1000)
    med_ll_spreadsheets_tests.append(test_col_num(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_row_num(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_entries(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_update(med_ll_spreadsheets[counter],x,x,x)*1000)
    med_ll_spreadsheets_tests.append(test_find(med_ll_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
med_ll_df = pd.DataFrame()
med_ll_df["low_density"] = med_ll_spreadsheets_tests[0:10]
med_ll_df["med_density"]= med_ll_spreadsheets_tests[10:20]
med_ll_df["high_density"] = med_ll_spreadsheets_tests[20:30]

med_ll_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(med_ll_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,21.1247,65.9856,111.4675
append_col,0.1023,0.0989,0.1223
append_row,0.0768,0.0774,0.0953
insert_col,3.6556,2.9177,4.3081
insert_row,1.2962,2.3298,3.005
col_num,0.0078,0.0127,0.0193
row_num,0.0083,0.0078,0.0187
entries,1.2897,2.1858,3.7428
update,0.0017,0.0027,0.0038
find,1.2966,1.5264,1.4852


In [119]:
#create medium linkedlist
large_ll_spreadsheets = [LinkedListSpreadsheet(), LinkedListSpreadsheet(), LinkedListSpreadsheet()]
large_ll_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(1,datasets.__len__(), 3):
    large_ll_spreadsheets_tests.append(test_buildSpreadSheet(large_ll_spreadsheets[counter],datasets[x])*1000)
    large_ll_spreadsheets_tests.append(test_append_col(large_ll_spreadsheets[counter])*1000)
    large_ll_spreadsheets_tests.append(test_append_row(large_ll_spreadsheets[counter])*1000)
    large_ll_spreadsheets_tests.append(test_insert_col(large_ll_spreadsheets[counter],x)*1000)
    large_ll_spreadsheets_tests.append(test_insert_row(large_ll_spreadsheets[counter],x)*1000)
    large_ll_spreadsheets_tests.append(test_col_num(large_ll_spreadsheets[counter])*1000)
    large_ll_spreadsheets_tests.append(test_row_num(large_ll_spreadsheets[counter])*1000)
    large_ll_spreadsheets_tests.append(test_entries(large_ll_spreadsheets[counter])*1000)
    large_ll_spreadsheets_tests.append(test_update(large_ll_spreadsheets[counter],x,x,x)*1000)
    large_ll_spreadsheets_tests.append(test_find(large_ll_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
large_ll_df = pd.DataFrame()
large_ll_df["low_density"] = large_ll_spreadsheets_tests[0:10]
large_ll_df["med_density"]= large_ll_spreadsheets_tests[10:20]
large_ll_df["high_density"] = large_ll_spreadsheets_tests[20:30]

large_ll_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(large_ll_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,25.8852,67.181,108.8512
append_col,0.134,0.1207,0.1304
append_row,0.0983,0.0788,0.0877
insert_col,3.8816,3.308,3.472
insert_row,3.0511,1.939,2.1493
col_num,0.0232,0.016,0.0114
row_num,0.0201,0.0102,0.0086
entries,2.6077,3.4226,2.4115
update,0.0021,0.0055,0.003
find,1.9751,3.8254,1.4365


In [120]:
#create small CSR

small_CSR_spreadsheets = [CSRSpreadsheet(), CSRSpreadsheet(), CSRSpreadsheet()]
small_CSR_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(0,datasets.__len__(), 3):
    small_CSR_spreadsheets_tests.append(test_buildSpreadSheet(small_CSR_spreadsheets[counter],datasets[x])*1000)
    small_CSR_spreadsheets_tests.append(test_append_col(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_append_row(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_insert_col(small_CSR_spreadsheets[counter],x)*1000)
    small_CSR_spreadsheets_tests.append(test_insert_row(small_CSR_spreadsheets[counter],x)*1000)
    small_CSR_spreadsheets_tests.append(test_col_num(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_row_num(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_entries(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_update(small_CSR_spreadsheets[counter],x,x,float(x))*1000)
    small_CSR_spreadsheets_tests.append(test_find(small_CSR_spreadsheets[counter],float(x))*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
small_CSR_df = pd.DataFrame()
small_CSR_df["low_density"] = small_CSR_spreadsheets_tests[0:10]
small_CSR_df["med_density"]= small_CSR_spreadsheets_tests[10:20]
small_CSR_df["high_density"] = small_CSR_spreadsheets_tests[20:30]

small_CSR_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(small_CSR_df)

got there
got there
got there


Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,0.8676,5.0923,9.5051
append_col,0.0019,0.0012,0.0016
append_row,0.0009,0.001,0.0014
insert_col,0.0132,0.1251,0.2999
insert_row,0.0017,0.0021,0.0021
col_num,0.0006,0.0006,0.0008
row_num,0.0005,0.0005,0.0005
entries,0.2588,1.6408,2.7813
update,0.6143,0.3897,0.7649
find,0.1123,0.5459,1.1538


In [109]:
#create medium CSR

med_CSR_spreadsheets = [CSRSpreadsheet(), CSRSpreadsheet(), CSRSpreadsheet()]
med_CSR_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(1,datasets.__len__(), 3):
    med_CSR_spreadsheets_tests.append(test_buildSpreadSheet(med_CSR_spreadsheets[counter],datasets[x])*1000)
    med_CSR_spreadsheets_tests.append(test_append_col(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_append_row(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_insert_col(med_CSR_spreadsheets[counter],x)*1000)
    med_CSR_spreadsheets_tests.append(test_insert_row(med_CSR_spreadsheets[counter],x)*1000)
    med_CSR_spreadsheets_tests.append(test_col_num(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_row_num(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_entries(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_update(med_CSR_spreadsheets[counter],x,x,float(x))*1000)
    med_CSR_spreadsheets_tests.append(test_find(med_CSR_spreadsheets[counter],float(x))*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
med_CSR_df = pd.DataFrame()
med_CSR_df["low_density"] = med_CSR_spreadsheets_tests[0:10]
med_CSR_df["med_density"]= med_CSR_spreadsheets_tests[10:20]
med_CSR_df["high_density"] = med_CSR_spreadsheets_tests[20:30]
# print
med_CSR_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(med_CSR_df)

got there
got there
got there


Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,3.7766,21.9813,43.418
append_col,0.002,0.001,0.002
append_row,0.0008,0.0006,0.0011
insert_col,0.1067,0.5377,0.8567
insert_row,0.0028,0.0027,0.0056
col_num,0.0007,0.0006,0.0008
row_num,0.0004,0.0005,0.0005
entries,1.1088,4.9236,9.6304
update,0.3122,1.2287,2.1847
find,0.4774,2.0246,4.1687


In [108]:
#create large CSR

large_CSR_spreadsheets = [CSRSpreadsheet(), CSRSpreadsheet(), CSRSpreadsheet()]
large_CSR_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(2,datasets.__len__(), 3):
    large_CSR_spreadsheets_tests.append(test_buildSpreadSheet(large_CSR_spreadsheets[counter],datasets[x])*1000)
    large_CSR_spreadsheets_tests.append(test_append_col(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_append_row(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_insert_col(large_CSR_spreadsheets[counter],x)*1000)
    large_CSR_spreadsheets_tests.append(test_insert_row(large_CSR_spreadsheets[counter],x)*1000)
    large_CSR_spreadsheets_tests.append(test_col_num(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_row_num(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_entries(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_update(large_CSR_spreadsheets[counter],x,x,x)*1000)
    large_CSR_spreadsheets_tests.append(test_find(large_CSR_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
large_CSR_df = pd.DataFrame()
large_CSR_df["low_density"] = large_CSR_spreadsheets_tests[0:10]
large_CSR_df["med_density"]= large_CSR_spreadsheets_tests[10:20]
large_CSR_df["high_density"] = large_CSR_spreadsheets_tests[20:30]

large_CSR_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(large_CSR_df)

got there
got there
got there


Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,780.0408,4976.5812,8867.5411
append_col,0.0037,0.0058,0.0055
append_row,0.0015,0.0017,0.0018
insert_col,19.7021,117.6264,196.522
insert_row,0.0075,0.0068,0.0078
col_num,0.0007,0.0007,0.0008
row_num,0.0011,0.0006,0.0006
entries,147.3225,696.3324,3547.7352
update,1.6392,177.0745,310.3379
find,61.0585,291.3114,524.9412


In [121]:
arrays = [small_df,med_df,large_df,small_ll_df,med_ll_df,large_ll_df,small_CSR_df,med_CSR_df,large_CSR_df]
array_names = ["small_df","med_df","large_df","small_ll_df","med_ll_df","large_ll_df","small_CSR_df","med_CSR_df","large_CSR_df"]
tot_df = pd.DataFrame() 
for index in range(len(arrays)):
    print(array_names[index])
    print( arrays[index].sum())
# display(small_df)
# display(med_df)
# display(large_df)W

# display(small_ll_df)
# display(med_ll_df)
# display(large_ll_df)

# display(small_CSR_df)
# display(med_CSR_df)
# display(large_CSR_df)

small_df
low_density     2.5509
med_density     6.0221
high_density    7.4962
dtype: float64
med_df
low_density     13.2046
med_density     26.0636
high_density    34.4825
dtype: float64
large_df
low_density      3223.5386
med_density      6641.9127
high_density    11006.2891
dtype: float64
small_ll_df
low_density      5.9148
med_density     10.2121
high_density    15.4019
dtype: float64
med_ll_df
low_density      28.8597
med_density      75.1448
high_density    124.2680
dtype: float64
large_ll_df
low_density      37.6784
med_density      79.9072
high_density    118.5616
dtype: float64
small_CSR_df
low_density      1.8718
med_density      7.7992
high_density    14.5114
dtype: float64
med_CSR_df
low_density      5.7884
med_density     30.7013
high_density    60.2685
dtype: float64
large_CSR_df
low_density      1009.7776
med_density      6258.9415
high_density    13447.0939
dtype: float64
