## Workflow: Lists and Memory

Objective

- See how lists use memory and how making copies and sublists affects memory
- Learn how to minimize that impact if necessary by using a generator expression

Importance to project

- This milestone will help you understand the real memory costs of creating lists using slices and list comprehensions and help you minimize those costs when needed by using a generator expression instead of a list comprehension.

In [102]:
# 1. Loading the data...

from data import product_data

# helper
def print_test_rows(data):
    for row in range(5):
        print(data[row])
    print()

# test
print_test_rows(product_data)


# report 1 using slice and comprehension based on solution examples

# [ [sku, description] + [(current_sales)] ]
report = [x[:2] + [x[5] * (x[6] + x[7])] for x in product_data[1:]]
report.sort()

# test
print_test_rows(report)


# report 2 using slice and comprehension based on solution examples

# [ [sku. description] + [margin] + [current_sales] ]
report2 = [ x[:2] + [x[4]] + [x[5] * (x[6] + x[7])] for x in product_data[1:] if x[5] * (x[6] + x[7]) > 1_000_000 ]
report2.sort(key=lambda x:x[3], reverse=True)

# test
print_test_rows(report2)

['sku', 'description', 'cost', 'stock', 'margin', 'list_price', 'units_sold_web', 'units_sold_stores', 'date']
['31288', 'Super Whatsit, Large (Dozen)', 301.81, 479, 0.47, 442.22, 457, 956, '2021-06-21']
['35957', 'Premium Widget, Extra Large (Gross)', 794.74, 855, 0.37, 1088.98, 816, 442, '2021-06-21']
['91505', 'Deluxe Widget, Extra Large (Gross)', 16.23, 2808, 0.28, 20.77, 406, 665, '2021-06-21']
['31258', 'Budget Device, Giant (Gross)', 7.86, 663, 0.34, 10.56, 752, 442, '2021-06-21']

['11009', 'Economy Device, Micro (Dozen)', 4123855.6999999997]
['11663', 'Economy Device, Micro (Gross)', 2529807.6999999997]
['13290', 'Premium Gizmo, Large ', 1520431.36]
['15862', 'Economy Whatsit, Micro (Gross)', 4089473.0999999996]
['16052', 'Super Whatsit, Large ', 892012.66]

['72710', 'Economy Gizmo, Large ', 0.47, 4558402.0]
['11009', 'Economy Device, Micro (Dozen)', 0.29, 4123855.6999999997]
['15862', 'Economy Whatsit, Micro (Gross)', 0.37, 4089473.0999999996]
['30603', 'Budget Whatsit, Gian

In [103]:
# 2. getsizeof()

from sys import getsizeof


# Demo - generator expression

a_list = [6, 3, 4, 8, 2, 1, 9, 5, 7, 0]

# list comprehension to pick out odd numbers
odds_01 = [x for x in a_list if x % 2]
print(f'odds_01: {odds_01}\n')

# generator expression to do the same
odds_02 = (x for x in a_list if x % 2)
print('odds_02:')
for odd in odds_02:
    print(odd)

print(f'\nodds_01: {odds_01}')
print(f'odds_02: {odds_02}\n')

# sorted(): creates a new list
sorted_list = sorted(a_list)
print(f'sorted_list: {sorted_list}\n')

# reversed(): is a generator and does not create a new list
reversed_list = reversed(sorted_list)
print(f'reversed_list: {reversed_list}')
for x in reversed_list:
    print(x)

odds_01: [3, 1, 9, 5, 7]

odds_02:
3
1
9
5
7

odds_01: [3, 1, 9, 5, 7]
odds_02: <generator object <genexpr> at 0x7f7982f6d560>

sorted_list: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

reversed_list: <list_reverseiterator object at 0x7f79a3793d30>
9
8
7
6
5
4
3
2
1
0


In [104]:
# 3. Use getsizeof() to compare the size of the product_data and report_by_sales lists...

print(f'getsizeof(product_data): {getsizeof(product_data)}\n')
print(f'getsizeof(report2): {getsizeof(report2)}')

getsizeof(product_data): 472

getsizeof(report2): 248


In [105]:
# 4. ... use a loop ... to add up the sizes of all of the elements... each item ... each row... and the list

# size of elements
elements_size = sum([getsizeof(element) for row in report2 for element in row])
print(f'Sum of element sizes in report2: {elements_size}')

# size of rows
row_sizes = sum([getsizeof(row) for row in report2])
print(f'Sum of row sizes in report2: {row_sizes}')

# size of the list
print(f'List size for report2: {getsizeof(report2)}')

# NO, it does not seem getsizeof() counts the contents of a list as part of the lists size

Sum of element sizes in report2: 3923
Sum of row sizes in report2: 2112
List size for report2: 248


In [106]:
# 5. use the id() function to understand what is actually created when we copy a list... slice or comprehension

print(f'report id(): {id(report)}\n')

# slice
slice = report[:1]
print(f'slice({id(slice)}): {slice}')
print(f'slice[0][0] id({id(slice[0][0])}): {slice[0][0]}')
print(f'report[0][0] id({id(report[0][0])}): {report[0][0]}')
slice[0][0] = '999999'
print(f'slice[0][0] id({id(slice[0][0])}): {slice[0][0]}')
print(f'report[0][0] id({id(report[0][0])}): {report[0][0]}\n')

# a slice DOES NOT create a copy of the items sliced because modifying the item sliced modified the original

# slice2 = report[:]
# print(f'slice2({id(slice2)}): {slice2}')

# comprehension
comp = [[row[0]] + [row[2]] for row in report if row[2] > 4_000_000]
print(f'comp({id(comp)}): {comp[0]}...')
print(f'comp[0][0] id({id(comp[0][0])}): {comp[0][0]}')
print(f'report[0][0] id({id(report[0][0])}): {report[0][0]}')
comp[0][0] = '777777'
print(f'comp[0][0] id({id(comp[0][0])}): {comp[0][0]}')
print(f'report[0][0] id({id(report[0][0])}): {report[0][0]}\n')

# a comprehension DID NOT originally create a copy of the items because the ID was the same, 
# but modifying the comprehension list item DID NOT modify the original.
# Is this because the comprehension row was not a reference to the same list as the report row, but in the slice it was?
# Was list[0][0] just a reference to the same immutable string in memory, too?

report id(): 140160360687424

slice(140159864890368): [['11009', 'Economy Device, Micro (Dozen)', 4123855.6999999997]]
slice[0][0] id(140160889394384): 11009
report[0][0] id(140160889394384): 11009
slice[0][0] id(140160890324192): 999999
report[0][0] id(140160890324192): 999999

comp(140159864887360): ['999999', 4123855.6999999997]...
comp[0][0] id(140160890324192): 999999
report[0][0] id(140160890324192): 999999
comp[0][0] id(140160890327504): 777777
report[0][0] id(140160890324192): 999999



In [107]:
# 6. Use a generator expression... and print the million dollar sellers report...

# [ [sku. description] + [margin] + [current_sales] ]
#gener = ( x[:2] + [x[4]] + [x[5] * (x[6] + x[7])] for x in product_data[1:] if x[5] * (x[6] + x[7]) > 1_000_000 )
# for x in gener:
#     print(x)
#report2.sort(key=lambda x:x[3], reverse=True)
# ????

#[x for x in sorted(product_data[1:], key=lambda x:x[3], reverse=True)]
#[x[:2] + [x[4]] + [x[5] * (x[6] + x[7])] for x in product_data[1:] if x[5] * (x[6] + x[7]) > 1_000_000]
#[x[:2] + [x[4]] + [x[5] * (x[6] + x[7])] for x in sorted(product_data[1:], key=lambda x:x[5] * (x[6] + x[7]), reverse=True) if x[5] * (x[6] + x[7]) > 1_000_000]

def calc_sales(row):
    return row[5] * (row[6] + row[7])

def sort_sales_data(data):
    return sorted(data, key=lambda row:calc_sales(row), reverse=True)

#[row[:2] + [row[4]] + [calc_sales(row)] for row in sort_sales_data(product_data[1:]) if calc_sales(row) > 1_000_000]
datagen = ( row[:2] + [row[4]] + [calc_sales(row)] for row in sort_sales_data(product_data[1:]) if calc_sales(row) > 1_000_000 )
#print(datagen)
for row in datagen:
    print(row)

['72710', 'Economy Gizmo, Large ', 0.47, 4558402.0]
['11009', 'Economy Device, Micro (Dozen)', 0.29, 4123855.6999999997]
['15862', 'Economy Whatsit, Micro (Gross)', 0.37, 4089473.0999999996]
['30603', 'Budget Whatsit, Giant (Dozen)', 0.45, 3294479.01]
['80372', 'Economy Device, Large (Gross)', 0.42, 2694492.3]
['11663', 'Economy Device, Micro (Gross)', 0.3, 2529807.6999999997]
['82046', 'Deluxe Widget, Mini (Dozen)', 0.28, 2372784.7]
['85501', 'Economy Gadget, Micro (1000-pack)', 0.5, 2234717.02]
['55603', 'Super Device, Giant ', 0.33, 2224998.5300000003]
['94261', 'Deluxe Widget, Micro ', 0.39, 2167003.65]
['16754', 'Super Gadget, Micro (Gross)', 0.35, 1958130.2999999998]
['67352', 'Deluxe Gadget, Large ', 0.45, 1926380.26]
['47360', 'Economy Gizmo, Extra Large (1000-pack)', 0.43, 1874225.17]
['29865', 'Premium Whatsit, Mini (Dozen)', 0.45, 1666704.6]
['93960', 'Premium Gizmo, Extra Large (Gross)', 0.46, 1645390.78]
['96835', 'Economy Gadget, Large (Gross)', 0.4, 1521828.0]
['13290', 