In [2]:
import ipyparallel as ipp

cluster = ipp.Cluster.from_file(r"C:\Users\QOOK\.ipython\profile_default\security\cluster-.json")
rc = cluster.connect_client_sync()
rc

Using existing profile dir: 'C:\\Users\\QOOK\\.ipython\\profile_default'


<ipyparallel.client.client.Client at 0x1371970>

# Python for Finance - part 4

#### 금융분석과 관련된 기본적인 내용을 정리하였습니다. 파이썬을 활용한 금융분석 (이브 힐피시 지음)을 참고하였습니다.

## Chapter 8. 파이썬 성능 개선

In [20]:
def perf_comp_data(func_list : list, data_list : list, rep : int=3, number : int=1):
    """
    여러 함수의 성능을 비교하기 위한 함수 
    
    :param func_list : 함수 이름 문자열 리스트
    :param data_list : 자료 집합 문자열 리스트
    :param rep : 전체 비교를 위한 반복 횟수
    :param number : 개별 함수의 실행 횟수
    """
    
    from timeit import repeat
    res_list = {}
    for name in enumerate(func_list):
        stmt = name[1] + '(' + data_list[name[0]] + ')'
        setup = "from __main__ import  " + name[1] + " , " + data_list[name[0]]
        results = repeat(stmt = stmt, setup=setup, repeat=rep, number=number)
        res_list[name[1]] = sum(results) / rep
        
    res_sort = sorted(res_list.items(), key = lambda x: x[1])
    
    for item in res_sort:
        rel = item[1] / res_sort[0][1]
        print('function : ' + item[0] + ', av. time sec : %9.5f, ' % item[1] + 'relative: %6.1f' % rel)

### 8.2 파이썬 패러다임과 성능

In [21]:
from math import *
def f(x):
    return abs(cos(x)) ** 0.5 + sin(2 + 3*x)

In [22]:
def f1(a):
    res = []
    for x in a:
        res.append(f(x))
    return res

def f2(a):
    return [f(x) for x in a]

def f3(a):
    ex = 'abs(cos(x)) ** 0.5 + sin(2 + 3 * x)'
    return [eval(ex) for x in a]

import numpy as np
I = 500000
a_np = np.arange(I)

def f4(a):
    return (np.abs(np.cos(a)) ** 0.5 + np.sin(2 + 3 * a))

import numexpr as ne
def f5(a): 
    """단일스레드용"""
    ex = 'abs(cos(a)) ** 0.5 + sin(2 + 3 * a)'
    ne.set_num_threads(1)
    return ne.evaluate(ex)

def f6(a): 
    """멀티스레드용"""
    ex = 'abs(cos(a)) ** 0.5 + sin(2 + 3 * a)'
    ne.set_num_threads(16)
    return ne.evaluate(ex)


In [23]:
%%time
a_py = range(I)
r1 = f1(a_py)
r2 = f2(a_py)
r3 = f3(a_py)
r4 = f4(a_np)
r5 = f5(a_np)
r6 = f6(a_np)

Wall time: 14 s


In [24]:
func_list = ['f1', 'f2', 'f3', 'f4', 'f5', 'f6']
data_list = ['a_py', 'a_py', 'a_py', 'a_np', 'a_np', 'a_np']

perf_comp_data(func_list, data_list)

function : f6, av. time sec :   0.02567, relative:    1.0
function : f5, av. time sec :   0.05929, relative:    2.3
function : f4, av. time sec :   0.06470, relative:    2.5
function : f2, av. time sec :   0.71775, relative:   28.0
function : f1, av. time sec :   0.74055, relative:   28.8
function : f3, av. time sec :  12.08998, relative:  470.9


### 8.2 메모리 배치와 성능

In [15]:
import numpy as np
#### order : C 언어와 같이 행 우선 저장 시에는 'C' / 포트란 언어와 같이 열 우선 저장 시에는 'F'
np.zeros((3,3), dtype=np.float64, order = 'C')

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [25]:
C = np.array([[1., 1., 1.],
              [2., 2., 2.],
              [3., 3., 3.]], order = 'C')

f = np.array([[1., 1., 1.],
              [2., 2., 2.],
              [3., 3., 3.]], order = 'F')

In [26]:
x = np.random.standard_normal((3, 1500000))
C = np.array(x, order='C')
f = np.array(x, order = 'F')
x = 0.0

In [27]:
%timeit C.sum(axis=0)

20.4 ms ± 1.73 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [35]:
%timeit C.sum(axis=1)

7.84 ms ± 226 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [31]:
%timeit C.std(axis=0)

86.9 ms ± 6.31 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [32]:
%timeit C.sum(axis=1)

7.88 ms ± 253 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [33]:
%timeit f.sum(axis=0)

45.7 ms ± 1.35 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [34]:
%timeit f.sum(axis=1)

60.4 ms ± 1.59 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


### 8.3 병렬 컴퓨팅
#### 8.3.1 몬테카를로 알고리즘
#### 8.3.2 순차 계산 방식
#### 8.3.3 병렬 계산 방식

In [37]:
!ipcluster nbextension enable

Enabling IPython clusters tab


### 8.4 멀티프로세싱

In [3]:
import multiprocessing as mp

In [4]:
import math
import numpy as np
def simulate_geometric_brownian_motion(p):
    M, I = p
    S0 = 100
    r = 0.05
    sigma = 0.2
    T = 1.0
    dt = T/M
    paths = np.zeros((M+1, I))
    paths[0] = S0
    for t in range(1,M+1):
        paths[t] = paths[t-1] * np.exp((r-0.5 * sigma ** 2) * dt + sigma * math.sqrt(dt) * np.random.standard_normal(I))
        
    return paths

In [5]:
paths = simulate_geometric_brownian_motion((5,2))
paths

array([[100.        , 100.        ],
       [106.12227072,  99.72146948],
       [ 99.62841726,  98.57837688],
       [ 88.83376816,  96.21992754],
       [ 79.24293663,  91.61515698],
       [ 95.08086351,  91.16413945]])

In [None]:
I = 10000
M = 100
t = 100
from time import time
times = []
for w in range(1, 17):
    t0 = time()
    pool = mp.Pool(processes=w)
    result = pool.map(simulate_geometric_brownian_motion, t *[(M, I), ])
    time.append(time() - t0)

### 8.5 동적 컴파일
### 8.5.1 동적 컴파일 소개 예제

In [1]:
from math import cos, log
def f_py(I, J):
    res = 0
    for i in range(I):
        for j in range(J):
            res += int(cos(log(1)))
            
    return res

In [2]:
I, J = 5000, 5000
%time f_py(I, J)

Wall time: 16.7 s


25000000

In [3]:
import numpy as np
def f_np(I, J):
    a = np.ones((I,J), dtype=np.float64)
    return int(np.sum(np.cos(np.log(a)))), a

In [4]:
%time res, a = f_np(I, J)

Wall time: 978 ms


In [5]:
a.nbytes

200000000

In [6]:
import numba as nb
f_nb = nb.jit(f_py)

ModuleNotFoundError: No module named 'numba'

In [9]:
import numba as nb

In [10]:
f_nb = nb.jit(f_py)

In [11]:
%time f_nb(I, J)

Wall time: 669 ms


25000000

### 8.6 Cython을 이용한 정적 컴파일