In [1]:
# Imports, nothing to see here
import random
import math
import sys,os

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
sys.path.insert(0, os.path.abspath('..'))
from spreadsheet.cell import Cell
from spreadsheet.arraySpreadsheet import ArraySpreadsheet
from spreadsheet.linkedlistSpreadsheet import LinkedListSpreadsheet
from spreadsheet.csrSpreadsheet import CSRSpreadsheet
from spreadsheet.baseSpreadsheet import BaseSpreadsheet
from time import perf_counter

In [2]:
def create_random_cells(num_cells: int, max_col:int, max_row:int) -> list[Cell]:
    """
    num_cells: number of cells you want
    max_col: maximum number of columns you want in your spreadsheet
    max_row: maximum number of rows you want in your spreadsheet
    """
    lCells = []
    lCells.append(Cell((max_row-1),(max_col-1),round(random.uniform(0.1,9.9), ndigits=2)))
    for _ in range(num_cells):
        row = random.randint(0, max_col-1)
        col = random.randint(0, max_row-1)
        val = round(random.uniform(0.1,9.9), ndigits=1)
        lCells.append(Cell(row, col, val))
    return lCells

def create_density(density:float, max_cols:int, max_rows:int):
    """
    percentage is a float of the percent of non-None cells you want, e.g. 0.1 represents 10% cells
    with non-None values.
    """
    ret = []
    ret.append(Cell((max_rows-1),(max_cols-1),(round(random.uniform(0.1,9.9), ndigits=2))))
    cells_to_make = max_rows*max_cols*density-1
    while cells_to_make > 0:
        row = random.randint(0, max_cols-1)
        col = random.randint(0, max_rows-1)
        val = round(random.uniform(0.1,9.9), ndigits=2)
        ret.append(Cell(row, col,val))
        cells_to_make -=1
    return ret


def write_to_file(cells_to_generate:int, max_col:int, max_row:int):
    file = open("sample_data.txt","w")
    file.write(create_random_cells(cells_to_generate, max_col, max_row))
    file.close()

def display_array(array: ArraySpreadsheet):
    arr =array.array 
    for x in arr:
        for y in x:
            if(y.val == None):
                print("("+str(y.row),str(y.col)+" N)", end = " ")
            else:
                print(y, end = " ")
        print() #newline
    print("finished")

def get_lCells()->list[Cell]:
    """
    basically read from the file and create a list of cells from the data from the file
    """
    try:
        lcells = []
        dataFile = open("sample_data.txt", 'r')
        for line in dataFile:
            values = line.split()
            currRow = int(values[0])
            currCol = int(values[1])
            currVal = float(values[2])
            currCell = Cell(currRow, currCol, currVal)
            # each line contains a cell
            lcells.append(currCell)
        dataFile.close()
        return lcells
    except FileNotFoundError as e:
        print("Data file doesn't exist.")

def set_up_cells(cells_to_generate, max_col, max_row):
    write_to_file(cells_to_generate, max_col, max_row)
    return get_lCells()

def test_buildSpreadSheet(spreadSheet:BaseSpreadsheet,lCells:list[Cell]):
    start_time = perf_counter()
    spreadSheet.buildSpreadsheet(lCells)
    end_time = perf_counter()
    return end_time-start_time

def test_entries(spreadSheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadSheet.entries()
    end_time = perf_counter()
    return end_time-start_time

def test_update(spreadSheet:BaseSpreadsheet, rowIndex: int, colIndex:int, val:float):
    start_time = perf_counter()
    spreadSheet.update(rowIndex, colIndex,val)
    end_time = perf_counter()
    return end_time-start_time

def test_append_row(spreadSheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadSheet.appendRow()
    end_time = perf_counter()
    return end_time-start_time

def test_append_col(spreadSheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadSheet.appendCol()
    end_time = perf_counter()
    return end_time-start_time

def test_insert_row(spreadSheet:BaseSpreadsheet, rowIndex:int):
    start_time = perf_counter()
    spreadSheet.insertRow(rowIndex)
    end_time = perf_counter()
    return end_time-start_time

def test_insert_col(spreadsheet:BaseSpreadsheet, colIndex:int):
    start_time = perf_counter()
    spreadsheet.insertCol(colIndex)
    end_time = perf_counter()
    return end_time-start_time

def test_find(spreadSheet:BaseSpreadsheet, value:float):
    start_time = perf_counter()
    spreadSheet.find(value)
    end_time = perf_counter()
    return end_time-start_time

def different_densities(cells_to_generate:int):
    low = random.uniform(0.01,0.3)
    med = random.uniform(0.31,0.60)
    high = random.uniform(0.61,0.99)
    densities = [low, med, high ]
    lCells_list = []
    for x in densities:
        lCells_list.append(create_density(cells_to_generate,x))
    return lCells_list

def test_row_num(spreadsheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadsheet.rowNum()
    end_time = perf_counter()
    return end_time-start_time
def test_col_num(spreadsheet:BaseSpreadsheet):
    start_time = perf_counter()
    spreadsheet.colNum()
    end_time = perf_counter()
    return end_time-start_time

In [None]:
#to test:
#3 different sizes -> 3 different  density
# 0.1, 0.5, 0.9
# (50,50) (500,500) (1000,1000)
#for all 3 of the abstract datat types


In [3]:
#generate datasets
#number of cells in each dataset:
#size/density:| 0.1   | 0.5    | 0.9
#50           | 250    | 1,250   |2,250
#100          | 1,000  | 5,000   |9,000
#500          | 2,5000 | 125,000 |225,000
datasets = []

for x in [0.1,0.5,0.9]:
    for y in [50,100,1000]:
        datasets.append(create_density(x,y,y))

In [None]:
#create small arrays
small_spreadsheets = [ArraySpreadsheet(), ArraySpreadsheet(), ArraySpreadsheet()]
small_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(0,datasets.__len__(), 3):
    small_spreadsheets_tests.append(test_buildSpreadSheet(small_spreadsheets[counter],datasets[x])*1000)
    small_spreadsheets_tests.append(test_append_col(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_append_row(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_insert_col(small_spreadsheets[counter],x)*1000)
    small_spreadsheets_tests.append(test_insert_row(small_spreadsheets[counter],x)*1000)
    small_spreadsheets_tests.append(test_col_num(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_row_num(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_entries(small_spreadsheets[counter])*1000)
    small_spreadsheets_tests.append(test_update(small_spreadsheets[counter],x,x,x)*1000)
    small_spreadsheets_tests.append(test_find(small_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
small_df = pd.DataFrame()
small_df["low_density"] = small_spreadsheets_tests[0:10]
small_df["med_density"]= small_spreadsheets_tests[10:20]
small_df["high_density"] = small_spreadsheets_tests[20:30]

small_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(small_df)

In [None]:
#create medium arrays
med_spreadsheets = [ArraySpreadsheet(), ArraySpreadsheet(), ArraySpreadsheet()]
med_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(1,datasets.__len__(), 3):
    med_spreadsheets_tests.append(test_buildSpreadSheet(med_spreadsheets[counter],datasets[x])*1000)
    med_spreadsheets_tests.append(test_append_col(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_append_row(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_insert_col(med_spreadsheets[counter],x)*1000)
    med_spreadsheets_tests.append(test_insert_row(med_spreadsheets[counter],x)*1000)
    med_spreadsheets_tests.append(test_col_num(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_row_num(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_entries(med_spreadsheets[counter])*1000)
    med_spreadsheets_tests.append(test_update(med_spreadsheets[counter],x,x,x)*1000)
    med_spreadsheets_tests.append(test_find(med_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
med_df = pd.DataFrame()
med_df["low_density"] = med_spreadsheets_tests[0:10]
med_df["med_density"]= med_spreadsheets_tests[10:20]
med_df["high_density"] = med_spreadsheets_tests[20:30]

med_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(med_df)

In [4]:
#create large arrays
large_spreadsheets = [ArraySpreadsheet(), ArraySpreadsheet(), ArraySpreadsheet()]
large_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(2,datasets.__len__(), 3):
    large_spreadsheets_tests.append(test_buildSpreadSheet(large_spreadsheets[counter],datasets[x])*1000)
    large_spreadsheets_tests.append(test_append_col(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_append_row(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_insert_col(large_spreadsheets[counter],x)*1000)
    large_spreadsheets_tests.append(test_insert_row(large_spreadsheets[counter],x)*1000)
    large_spreadsheets_tests.append(test_col_num(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_row_num(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_entries(large_spreadsheets[counter])*1000)
    large_spreadsheets_tests.append(test_update(large_spreadsheets[counter],x,x,x)*1000)
    large_spreadsheets_tests.append(test_find(large_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
large_df = pd.DataFrame()
large_df["low_density"] = large_spreadsheets_tests[0:10]
large_df["med_density"]= large_spreadsheets_tests[10:20]
large_df["high_density"] = large_spreadsheets_tests[20:30]
large_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(large_df)

Unnamed: 0,low_density,med_density,high_density
build_spreadsheet,784.985667,1229.850625,1918.419292
append_col,41.788167,382.652458,257.374083
append_row,0.371084,0.367667,317.050208
insert_col,135.193541,231.3305,1501.431792
insert_row,397.818833,181.419584,191.835833
col_num,0.002292,0.002209,0.002583
row_num,0.000625,0.000625,0.000542
entries,439.023125,1775.46,2693.060958
update,0.004333,0.003584,0.004792
find,100.339125,203.646416,221.530833


In [None]:
#create small linkedlist
small_ll_spreadsheets = [LinkedListSpreadsheet(), LinkedListSpreadsheet(), LinkedListSpreadsheet()]
small_ll_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(0,datasets.__len__(), 3):
    small_ll_spreadsheets_tests.append(test_buildSpreadSheet(small_ll_spreadsheets[counter],datasets[x])*1000)
    small_ll_spreadsheets_tests.append(test_append_col(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_append_row(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_insert_col(small_ll_spreadsheets[counter],x)*1000)
    small_ll_spreadsheets_tests.append(test_insert_row(small_ll_spreadsheets[counter],x)*1000)
    small_ll_spreadsheets_tests.append(test_col_num(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_row_num(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_entries(small_ll_spreadsheets[counter])*1000)
    small_ll_spreadsheets_tests.append(test_update(small_ll_spreadsheets[counter],x,x,x)*1000)
    small_ll_spreadsheets_tests.append(test_find(small_ll_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
small_ll_df = pd.DataFrame()
small_ll_df["low_density"] = small_ll_spreadsheets_tests[0:10]
small_ll_df["med_density"]= small_ll_spreadsheets_tests[10:20]
small_ll_df["high_density"] = small_ll_spreadsheets_tests[20:30]

small_ll_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(small_ll_df)

In [None]:
#create medium linkedlist
med_ll_spreadsheets = [LinkedListSpreadsheet(), LinkedListSpreadsheet(), LinkedListSpreadsheet()]
med_ll_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(1,datasets.__len__(), 3):
    med_ll_spreadsheets_tests.append(test_buildSpreadSheet(med_ll_spreadsheets[counter],datasets[x])*1000)
    med_ll_spreadsheets_tests.append(test_append_col(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_append_row(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_insert_col(med_ll_spreadsheets[counter],x)*1000)
    med_ll_spreadsheets_tests.append(test_insert_row(med_ll_spreadsheets[counter],x)*1000)
    med_ll_spreadsheets_tests.append(test_col_num(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_row_num(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_entries(med_ll_spreadsheets[counter])*1000)
    med_ll_spreadsheets_tests.append(test_update(med_ll_spreadsheets[counter],x,x,x)*1000)
    med_ll_spreadsheets_tests.append(test_find(med_ll_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
med_ll_df = pd.DataFrame()
med_ll_df["low_density"] = med_ll_spreadsheets_tests[0:10]
med_ll_df["med_density"]= med_ll_spreadsheets_tests[10:20]
med_ll_df["high_density"] = med_ll_spreadsheets_tests[20:30]

med_ll_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries", "update","find"]
display(med_ll_df)

In [None]:
#create small CSR

small_CSR_spreadsheets = [CSRSpreadsheet(), CSRSpreadsheet(), CSRSpreadsheet()]
small_CSR_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(0,datasets.__len__(), 3):
    small_CSR_spreadsheets_tests.append(test_buildSpreadSheet(small_CSR_spreadsheets[counter],datasets[x])*1000)
    small_CSR_spreadsheets_tests.append(test_append_col(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_append_row(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_insert_col(small_CSR_spreadsheets[counter],x)*1000)
    small_CSR_spreadsheets_tests.append(test_insert_row(small_CSR_spreadsheets[counter],x)*1000)
    small_CSR_spreadsheets_tests.append(test_entries(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_col_num(small_CSR_spreadsheets[counter])*1000)
    small_CSR_spreadsheets_tests.append(test_row_num(small_CSR_spreadsheets[counter])*1000)
    # med_CSR_spreadsheets_tests.append(test_update(med_CSR_spreadsheets[counter],x,x,x)*1000)
    small_CSR_spreadsheets_tests.append(test_find(small_CSR_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
small_CSR_df = pd.DataFrame()
small_CSR_df["low_density"] = small_CSR_spreadsheets_tests[0:9]
small_CSR_df["med_density"]= small_CSR_spreadsheets_tests[9:18]
small_CSR_df["high_density"] = small_CSR_spreadsheets_tests[18:27]

small_CSR_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries","find"]
display(small_CSR_df)

In [None]:
#create medium CSR

med_CSR_spreadsheets = [CSRSpreadsheet(), CSRSpreadsheet(), CSRSpreadsheet()]
med_CSR_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(1,datasets.__len__(), 3):
    med_CSR_spreadsheets_tests.append(test_buildSpreadSheet(med_CSR_spreadsheets[counter],datasets[x])*1000)
    med_CSR_spreadsheets_tests.append(test_append_col(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_append_row(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_insert_col(med_CSR_spreadsheets[counter],x)*1000)
    med_CSR_spreadsheets_tests.append(test_insert_row(med_CSR_spreadsheets[counter],x)*1000)
    med_CSR_spreadsheets_tests.append(test_entries(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_col_num(med_CSR_spreadsheets[counter])*1000)
    med_CSR_spreadsheets_tests.append(test_row_num(med_CSR_spreadsheets[counter])*1000)
    # med_CSR_spreadsheets_tests.append(test_update(med_CSR_spreadsheets[counter],x,x,x)*1000)
    small_CSR_spreadsheets_tests.append(test_find(med_CSR_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
med_CSR_df = pd.DataFrame()
med_CSR_df["low_density"] = med_CSR_spreadsheets_tests[0:9]
med_CSR_df["med_density"]= med_CSR_spreadsheets_tests[9:18]
med_CSR_df["high_density"] = med_CSR_spreadsheets_tests[18:27]

med_CSR_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries","find"]
display(med_CSR_df)

In [None]:
#create large CSR

large_CSR_spreadsheets = [CSRSpreadsheet(), CSRSpreadsheet(), CSRSpreadsheet()]
large_CSR_spreadsheets_tests =[]
#run the tests
counter = 0
for x in range(2,datasets.__len__(), 3):
    large_CSR_spreadsheets_tests.append(test_buildSpreadSheet(large_CSR_spreadsheets[counter],datasets[x])*1000)
    large_CSR_spreadsheets_tests.append(test_append_col(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_append_row(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_insert_col(large_CSR_spreadsheets[counter],x)*1000)
    large_CSR_spreadsheets_tests.append(test_insert_row(large_CSR_spreadsheets[counter],x)*1000)
    large_CSR_spreadsheets_tests.append(test_entries(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_col_num(large_CSR_spreadsheets[counter])*1000)
    large_CSR_spreadsheets_tests.append(test_row_num(large_CSR_spreadsheets[counter])*1000)
    # large_CSR_spreadsheets_tests.append(test_update(large_CSR_spreadsheets[counter],x,x,x)*1000)
    large_CSR_spreadsheets_tests.append(test_find(large_CSR_spreadsheets[counter],x)*1000)
    counter +=1
#create a pandas data frame to make it all look nice and maybe for graph generation laters
large_CSR_df = pd.DataFrame()
large_CSR_df["low_density"] = large_CSR_spreadsheets_tests[0:9]
large_CSR_df["med_density"]= large_CSR_spreadsheets_tests[9:18]
large_CSR_df["high_density"] = large_CSR_spreadsheets_tests[18:27]

large_CSR_df.index = ["build_spreadsheet","append_col", "append_row", "insert_col", "insert_row", "col_num", "row_num","entries","find"]
display(large_CSR_df)