In [10]:
import pandas as pd
import numpy as np
import time

from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool


def multithreading_apply(df, func, n_threads=10):
    df_split = np.array_split(df,n_threads*2)
    pool = ThreadPool(n_threads)
    df = pd.concat(pool.map(func,df_split))
    pool.close()
    pool.join()
    return df

def multiprocessing_apply(df, func, n_cores=4):
    df_split = np.array_split(df,n_cores*2)
    pool = Pool(n_cores)
    df = pd.concat(pool.map(func,df_split))
    pool.close()
    pool.join()
    return df

def func(df):
    return df.apply(lambda row: row['dummy_value']**2 + 1,axis=1)
    
if __name__ == '__main__':
    df = pd.DataFrame([1,2,3,4]*100000,columns=['dummy_value'])
    print('data_size:',len(df))
    t = time.time()
    df['single_processed_value'] = func(df)
    t1 = time.time()
    df['processed_value'] = multithreading_apply(df,func)
    t2 = time.time()
    df['processed2_value'] = multiprocessing_apply(df,func)
    t3 = time.time()
    print('singleprocessing',np.round(t1-t,4),'sec')
    print('multitheading:',np.round(t2-t1,4),'sec')
    print('multiprocessing:',np.round(t3-t2,4),'sec')

data_size: 400000
singleprocessing 6.5786 sex
multitheading: 6.828 sec
multiprocessing: 2.5086 sec


### result 
computation task의 경우 threading 이득이 없음. => file, network io와 같은 block이 발생하는 task의 경우 이득발생  
multiprocessing 의 경우 코어수에 비례해서 성능향상