# Psychoinformatics - Week 16 (Examples)
by Tsung-Ren (Tren) Huang (trhuang@g.ntu.edu.tw)

In [None]:
##### %config IPCompleter.greedy=True 
#%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd

## 1 Performance Profiling (of CPU & RAM)
To profile your programs outside Jupyter Notebook, popular choices are <a href="https://docs.python.org/3.2/library/profile.html">cProfile</a>.

### 1.0 Testing Materials

In [1]:
import time
s=0 # global sum

def job(t):
    global s
    s+=1
    print('Job ',t,s)
    tmp=range(t)
    time.sleep(t)  # wait for "t" seconds
    return t

### 1.1 CPU profiling (time/timeit/prun/lprun)

In [2]:
%time job(1)

Job  1 1
CPU times: user 6.08 ms, sys: 2.79 ms, total: 8.87 ms
Wall time: 1 s


1

In [3]:
%timeit job(1) # repeated measures

Job  1 2
Job  1 3
Job  1 4
Job  1 5
Job  1 6
Job  1 7
1 loop, best of 5: 1 s per loop


In [4]:
%%time 
print('Profiling the whole cell')
for i in range(5):
    job(i)

Profiling the whole cell
Job  0 8
Job  1 9
Job  2 10
Job  3 11
Job  4 12
CPU times: user 47.8 ms, sys: 12.9 ms, total: 60.6 ms
Wall time: 10 s


In [5]:
%%prun 
for i in range(5):
    print(job(i))

Job  0 13
0
Job  1 14
1
Job  2 15
2
Job  3 16
3
Job  4 17
4
 

### 1.2 Memory Profiling (memit/mprun)

In [21]:
#!pip install memory_profiler
%load_ext memory_profiler

In [None]:
%memit a=list(range(100000))
del a
%memit a=range(100000)
del a 
%memit a=np.arange(100000)

peak memory: 68.16 MiB, increment: 8.39 MiB
peak memory: 69.09 MiB, increment: -2.39 MiB
peak memory: 69.61 MiB, increment: -1.86 MiB


In [52]:
%%file my_test.py
import time
def new_job(t):
    print('Job ',t)
    tmp=range(t)
    time.sleep(t)  # wait for "t" seconds
    return t

Writing my_test.py


In [None]:
#For co-lab:
#import sys
#sys.path.append('/content')

from my_test import new_job
%mprun -f new_job new_job(3)

## 2 Asynchronous Computing (require Python 3.7+)
Python is still working on asyncio. You should always check out the <a href="https://docs.python.org/3/library/asyncio.html">latest documents</a>.

### 2.0 asyncio

In [13]:
%%file test.py
import asyncio
import time

async def say_after(delay, what):
    await asyncio.sleep(delay)
    print(what, delay)

async def main1():
    t0=time.time()
    await say_after(4,'hello')
    await say_after(2,'world')
    print(time.time()-t0)
    
async def main2(): # wait for every task to complete:
    t0=time.time()
    await asyncio.gather(say_after(4,'hello'),say_after(2,'world'))
    #await asyncio.gather(*[say_after(1,'hello'),say_after(2,'world')])
    print(time.time()-t0)
    
asyncio.run(main2()) # switch between main1() & main2()

Overwriting test.py


In [14]:
!python test.py

world 2
hello 4
4.002674102783203


### 2.1 Synchronous Crawlers

In [17]:
import urllib.request
def ptt(page):
    print(page,end=' ')
    u='http://www.ptt.cc/bbs/boy-girl/index'+str(page)+'.html' 
    r=urllib.request.Request(u,headers={'User-Agent':''})
    return urllib.request.urlopen(r).read().decode('utf-8')

In [18]:
%%time
for i in range(1,11): 
    text=ptt(i)
    #print(text) # it does get stuff!

1 2 3 4 5 6 7 8 9 10 CPU times: user 95.1 ms, sys: 17.6 ms, total: 113 ms
Wall time: 8.02 s


### 2.1 Asynchronous Crawlers

In [None]:
#!pip install aiohttp

In [22]:
%%file test.py

import aiohttp,asyncio,time

async def ptt(session,page):
    print(page,end=' ')
    URI='http://www.ptt.cc/bbs/boy-girl/index'+str(page)+'.html' 
    response = await session.get(URI) # wait & switch
    return await response.text()

async def main():
    async with aiohttp.ClientSession() as session:
        all_texts = await asyncio.gather(*[ptt(session,i) for i in range(1,11)])
        print(all_texts) # it does get stuff!

t0=time.time()
asyncio.run(main())
print(time.time()-t0)

Overwriting test.py


In [23]:
!python test.py

1 2 3 4 5 6 7 8 9 10 ['<!DOCTYPE html>\n<html>\n\t<head>\n\t\t<meta charset="utf-8">\n\t\t\n\n<meta name="viewport" content="width=device-width, initial-scale=1">\n\n<title>看板 Boy-Girl 文章列表 - 批踢踢實業坊</title>\n\n<link rel="stylesheet" type="text/css" href="//images.ptt.cc/bbs/v2.27/bbs-common.css">\n<link rel="stylesheet" type="text/css" href="//images.ptt.cc/bbs/v2.27/bbs-base.css" media="screen">\n<link rel="stylesheet" type="text/css" href="//images.ptt.cc/bbs/v2.27/bbs-custom.css">\n<link rel="stylesheet" type="text/css" href="//images.ptt.cc/bbs/v2.27/pushstream.css" media="screen">\n<link rel="stylesheet" type="text/css" href="//images.ptt.cc/bbs/v2.27/bbs-print.css" media="print">\n\n\n\n\n\t</head>\n    <body>\n\t\t\n<div id="topbar-container">\n\t<div id="topbar" class="bbs-content">\n\t\t<a id="logo" href="/bbs/">批踢踢實業坊</a>\n\t\t<span>&rsaquo;</span>\n\t\t<a class="board" href="/bbs/Boy-Girl/index.html"><span class="board-label">看板 </span>Boy-Girl</a>\n\t\t<a class="right small

## 3 Parallel Computing (w/ multiple threads or processes)

In [45]:
# Number of threads NumPy can utilize
#import mkl
#mkl.get_max_threads()

# For colab:
#!lscpu | grep 'Number of threads/core:'
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



### 3.0 Sequential Commands (filter/map/reduce)

In [37]:
from functools import reduce

def f1(x):
    return x if x > 5 else None

def f2(x,y):
    return x+y

a=list(range(1,11))
b=filter(f1,a)
c=map(f1,a)
d=reduce(f2,a)

print(list(b),list(c),d)

[6, 7, 8, 9, 10] [None, None, None, None, None, 6, 7, 8, 9, 10] 55


### 3.1 Multiple Threads (w/ shared memory) for I/O-heavy tasks

In [6]:
import time, concurrent.futures as cf

s=0 # global sum

def job(t):
    global s
    s+=1
    print('Job ',t,s)
    tmp=range(t)
    time.sleep(t)  # wait for "t" seconds
    return t

In [10]:
%%time
with cf.ThreadPoolExecutor(max_workers=4) as pool:
    results=pool.map(job,range(1,5))
print(list(results))

Job  1 9
Job  2 10
Job  3 11
Job  4 12
[1, 2, 3, 4]
CPU times: user 31.9 ms, sys: 4.78 ms, total: 36.7 ms
Wall time: 4.02 s


### 3.2 Multiple Processes (w/ distributed memory) for computation-heavy tasks



In [8]:
%%time

# Different processes don't share memory
with cf.ProcessPoolExecutor(max_workers=4) as pool:
    results=pool.map(job,range(1,5))
print(list(results))

Job  2 5
Job  1 5
Job  3 5
Job  4 5
[1, 2, 3, 4]
CPU times: user 39.6 ms, sys: 97.3 ms, total: 137 ms
Wall time: 4.18 s


In [12]:
# Testing Queue for memory-sharing:
from multiprocessing import Pool, Queue
queue = Queue()
queue.put('abc')
msg=queue.get()    
print(msg)

abc


In [13]:
# Revise the job function to see a shared queue:
def job2(tq): #ts=(t,q)
    s=tq[1].get()+1 # get & update the sum from the queue
    tq[1].put(s) # put the sum back to the queue
    print('Job ',tq[0],s)
    time.sleep(tq[0])  # wait for "t" seconds
    return tq[0]

In [14]:
# Testing a shared queue:
q=Queue()
q.put(0)
job2((1,q))
job2((2,q))
[(i,q) for i in range(1,5)]

Job  1 1
Job  2 2


[(1, <multiprocessing.queues.Queue at 0x7f2d557223d0>),
 (2, <multiprocessing.queues.Queue at 0x7f2d557223d0>),
 (3, <multiprocessing.queues.Queue at 0x7f2d557223d0>),
 (4, <multiprocessing.queues.Queue at 0x7f2d557223d0>)]

In [15]:
%%time
# Multiple processes w/ a shared queue:
import multiprocessing
m=multiprocessing.Manager()
q=m.Queue()
q.put(0)
pool=Pool(processes=4)  
results=pool.map(job2,[(i,q) for i in range(1,5)])
print(list(results))

Job  3 2
Job  1 1
Job  2 3
Job  4 4
[1, 2, 3, 4]
CPU times: user 40.7 ms, sys: 132 ms, total: 173 ms
Wall time: 4.16 s


### 3.2 GPU Computing
Here we used <a href="https://pytorch.org/">PyTorch</a>.
You can check <a href="https://github.com/wkentaro/pytorch-for-numpy-users">"PyTorch for NumPy users"</a> for a quick introduction.

In [4]:
%%time
import numpy as np
m=np.random.rand(4096,2160) #4K video
for i in range(120): #120 frames
    m/=2

CPU times: user 1.8 s, sys: 9.95 ms, total: 1.81 s
Wall time: 1.8 s


In [2]:
import torch as t
t.backends.cudnn.benchmark=True

In [5]:
%%time
m=t.rand(4096,2160).cuda() # if you have a NVIDIA gpu
#m=t.rand(4096,2160)
for i in range(120): #120 frames
    m=t.div(m,2)

CPU times: user 78.3 ms, sys: 0 ns, total: 78.3 ms
Wall time: 79.3 ms
