# ---------------------------------------------------------------------
# Section 2
#   Parallelism with Multiprocessing - Process vs Thread, Parallelism
#   Keyword - Process, Thread, 병렬성
# ---------------------------------------------------------------------

# (1).Parallelism
    - 완전히 동일한 타이밍(시점)에 태스크 실행
    - 다양한 파트(부분)으로 나눠서 실행(합 나눠서 구하고 취합)
    - 멀티프로세싱에서 CPU가 1 Core 인 경우 만족하지 않음
    - 딥러닝, 비트코인 채굴 등

# (2).Process vs Thread(차이 비교(중요))
    - 독립된 메모리(프로세스)              , 공유메모리(스레드)
    - 많은 메모리 필요(프로세스)           , 적은 메모리(스레드)
    - 좀비(데드)프로세스 생성 가능성       , 좀비(데드) 스레드 생성 쉽지 않음
    - 오버헤드 큼(프로세스)                , 오버헤드 작음(스레드)
    - 생성/소멸 다소 느림(프로세스)        , 생성/소멸 빠름(스레드)
    - 코드 작성 쉬움/디버깅 어려움(프로세스), 코드작성 어려움/디버깅 어려움(스레드)

# multiprocessing(1) - Join, is_alive
#   Keyword - multiprocessing, processing state

In [6]:
'''
Jupyter Notebook에서는 Sub프로세스 실행 출력 나오지 않음 
아래는 py_ad_2_2.py파일 실행으로 얻은 결과 

18:07:31: Main-Process : before creating Process
18:07:31: Main-Process : During Process
18:07:31: Main-Process : Joined Process
Sub-Process First: starting
Sub-Process First: finishing
Process p is alive: False
'''

from multiprocessing import Process
import time
import logging

# 프로세스 실행 함수
def proc_func(name):
    print("Sub-Process {}: starting".format(name))
    time.sleep(3)
    print("Sub-Process {}: finishing".format(name))

def main():
    # Logging format 설정
    format = "%(asctime)s: %(message)s"
    logging.basicConfig(format=format, level=logging.INFO, datefmt="%H:%M:%S")

    # 함수 인자 확인
    p = Process(target=proc_func, args=('1st',))

    logging.info("Main-Process : before creating Process")

    # 프로세스 시작
    p.start()

    logging.info("Main-Process : During Process")

    #logging.info("Main-Process : Terminated Process")
    #p.terminate()

    logging.info("Main-Process : Joined Process")
    p.join()

    # 프로세스 상태 확인
    print(f'Process p is alive: {p.is_alive()}')

# 메인 시작
if __name__ == '__main__':
    main()

18:09:52: Main-Process : before creating Process
18:09:52: Main-Process : During Process
18:09:52: Main-Process : Joined Process


Process p is alive: False


In [1]:
# Parallelism with Multiprocessing - multiprocessing(2) - Naming 
#   Keyword - Naming, parallel processing
#   Jupyter Notebook에서는 Sub프로세스 실행 출력 나오지 않음 : temp.py 파일 활용 

from multiprocessing import Process, current_process
import os
import random
import time

# 실행 방법
def square(n):

    # 랜덤 sleep
    time.sleep(random.randint(1, 3))
    process_id = os.getpid()
    process_name = current_process().name

    # 제곱
    result = n * n
    
    # 정보 출력
    print(f"Process ID: {process_id}, Process Name: {process_name}")
    print(f"Result of {n} square : {result}")


if __name__ == "__main__":
    # 부모 프로세스 아이디
    parent_process_id = os.getpid()
    # 출력
    print(f"Parent process ID {parent_process_id}")

    # 프로세스 리스트  선언
    processes = list()

    # 프로세스 생성 및 실행
    for i in range(1, 10): # 1 ~ 100 적절히 조절
        # 생성
        t = Process(name=str(i), target=square, args=(i,))

        # 배열에 담기
        # 프로세스를 생성시켜 한번에 join을 시키려고 리스트에 담음 
        processes.append(t)
        #print(processes)

        # 시작
        t.start()

    # Join
    for process in processes:
        process.join()

    # 종료
    print("Main-Processing Done!")

Parent process ID 20120
Main-Processing Done!


In [7]:
# Parallelism with Multiprocessing - multiprocessing(3) - ProcessPoolExecutor
#   Keyword - ProcessPoolExecutor, as_completed, futures, timeout, dict
#   temp.py에서 테스트 할 것 
#   Process를 생성해 할당할 경우 : ProcessPoolExecutor 사용 (Jupyter Notebook에서는 사용 불가)
#   https://medium.com/@grvsinghal/speed-up-your-python-code-using-multiprocessing-on-windows-and-jupyter-or-ipython-2714b49d6fac
#   Thread를 생성해 할당할 경우 : ThreadPoolExecutor 사용

# Future는 대기 중인 작업을 큐에 넣고, 완료 상태를 조사하고, 결과 혹은 예외를 가져올 수 있도록 캡슐화
# Executor.map()의 경우, 호출한 순서 그대로 결과를 반환
# Executor.submit(), Executor.as_completed()를 함께 사용하면, 완료되는 순서대로 결과 반환 

from concurrent.futures import ProcessPoolExecutor, as_completed
import urllib.request

# 조회 URLS
URLS = ['http://www.daum.net/',
        'http://www.cnn.com/',
        'http://europe.wsj.com/',
        'http://www.bbc.co.uk/',
        'http://some-made-up-domain.com/']

# 실행 함수
def load_url(url, timeout):
    with urllib.request.urlopen(url, timeout=timeout) as conn:
        return conn.read()

def main():
    # 프로세스풀 Context 영역
    with ProcessPoolExecutor(max_workers=5) as executor:
        # Future 로드(실행X)
        future_to_url = {executor.submit(load_url, url, 60): url for url in URLS}
        
        # 중간 확인(Dict)
        # print(future_to_url)
        
        # 실행
        for future in as_completed(future_to_url): # timeout=1(테스트 추천)
            # Key값이 Future 객체
            url = future_to_url[future]
            try:
                # 결과
                data = future.result()
            except Exception as exc:
                # 예외 처리
                print('%r generated an exception: %s' % (url, exc))
            else:
                # 결과 확인
                print('%r page is %d bytes' % (url, len(data)))

# 메인 시작
if __name__ == '__main__':
    main()


'http://www.daum.net/' generated an exception: A process in the process pool was terminated abruptly while the future was running or pending.
'http://www.cnn.com/' generated an exception: A process in the process pool was terminated abruptly while the future was running or pending.
'http://europe.wsj.com/' generated an exception: A process in the process pool was terminated abruptly while the future was running or pending.
'http://www.bbc.co.uk/' generated an exception: A process in the process pool was terminated abruptly while the future was running or pending.
'http://some-made-up-domain.com/' generated an exception: A process in the process pool was terminated abruptly while the future was running or pending.
