### Python module/package imports for this chapter

In [1]:
import sys, math, collections, itertools, multiprocessing, gzip

In [2]:
import numpy as np

import matplotlib
import matplotlib.pyplot as pp

%matplotlib inline

In [3]:
%load_ext line_profiler
%load_ext memory_profiler

ModuleNotFoundError: No module named 'line_profiler'

## Profiling memory
<p> Not always %100 accurate but allows you to get an idea for how things are working </p>

In [5]:
vector_list = [float(i) for i in range(100000)] #dynamic array pointing to values

In [6]:
vector_np = np.arange(0,100000,dtype='d') #contigious region of memory, slightly smaller

In [7]:
sys.getsizeof(1.0)

24

In [8]:
#measuring the memory usage, must be done in terminal not jupyter notebooks
%%file memory.py

import numpy as np

@profile
def allocate():
    vector_list = [float(i) for i in range(100000)] #dynamic array pointing to values
    vector_np = np.arange(0,100000,dtype='d') #contigious region of memory

allocate()

UsageError: Line magic function `%%file` not found.


In [9]:
!python -m memory_profiler memory.py #run code in shell


G:\Anaconda\python.exe: No module named memory_profiler


Objects that can store data records

In [10]:
dict_list = [{'x': 1.0*i, 'y': 2.5*i} for i in range(100000)]

In [11]:
class xy_class(object):
    def __init__(self,x,y):
        self.x,self.y = x,y
        
class_list = [xy_class(1.0*i,2.5*i) for i in range(100000)]

In [12]:
#By declaring slots you define the only attributes that can be set, improving speed.
class xy_slots_class(object):
    __slots__ = ['x','y'] 
    def __init__(self,x,y):
        self.x,self.y = x,y
        
slots_list = [xy_slots_class(1.0*i,2.5*i) for i in range(100000)]

In [14]:
xy_namedtuple = collections.namedtuple('xy',['x','y'])

In [15]:
xytuple = [{1.0*i, 2.5*i} for i in range(100000)]

In [16]:
xylist = [{1.0*i, 2.5*i} for i in range(100000)]

In [17]:
record_np = np.fromiter(((1.0*i, 2.5*i)for i in range(100000)), dtype=[('x','d'), ('y','d')])

## Comparing memory sizes of different storage methods containing the same info


In [18]:

%%file memory2.py

import collections
import numpy as np



def allocate():
    dict_list = [{'x': 1.0*i, 'y': 2.5*i} for i in range(100000)] #slowest 32mb

    class xy_class(object): #2nd slowest 20mb
        def __init__(self,x,y):
            self.x,self.y = x,y

    class_list = [xy_class(1.0*i,2.5*i) for i in range(100000)]

    class xy_slots_class(object): #Considerably faster than a normal class at 12mb
        __slots__ = ['x','y'] 
        def __init__(self,x,y):
            self.x,self.y = x,y

    slots_list = [xy_slots_class(1.0*i,2.5*i) for i in range(100000)]

    xy_namedtuple = collections.namedtuple('xy',['x','y']) # 12mb

    xytuple = [{1.0*i, 2.5*i} for i in range(100000)] #11mb

    xylist = [{1.0*i, 2.5*i} for i in range(100000)] #13mb

    record_np = np.fromiter(((1.0*i, 2.5*i)for i in range(100000)), dtype=[('x','d'), ('y','d')]) #By far best way to store at 1.5mb
    
allocate()

Writing memory2.py


In [35]:
easilyApply = True
softSkillList = ['a','2','v']
techSkillList = ['a','2','v']
html_str = "<hr><h1>EA\t" if easilyApply else "<hr><h1>"

html_str = html_str + jobTitle.getText().strip() + "</h1><h2>EA\t" + companyName.getText().strip() + "</h2> <a href=\"" + str(jobURL) + "\"> URL LINK </a><br>" + jobDesc.prettify()  + "<h3 style=\"color:green\"><b>TechSkills:</b> <br/> {} </h3> ".format(techSkillList)    html_str = html_str + "<h3 style=\"color:orange\"><b>SoftSkills:</b> <br/> {} </h3> ".format(softSkillList)

  
print(html_str)

SyntaxError: invalid syntax (<ipython-input-35-b785e0bf078c>, line 6)