In [2]:
import os
import time
from functools import reduce
from collections import Counter
from pprint import pprint
from numba import jit
from concurrent.futures import ThreadPoolExecutor

In [3]:
folder_name = input("Fill in the folder name in current directory: ")
path = os.path.abspath(folder_name)
os.chdir(path)

Fill in the folder name in current directory: documents


In [8]:
def read_file(file_path):
    with open(file_path, 'r') as f:
        return f.read()

@jit(nogil=True)
def hard_computation():
    for i in range (1, 10000000):
        i += 1

@jit(nogil=True)
def count_unique_words(str):
    hard_computation()
    count = {}
    for word in str.split('\n'):
       if word in count :
          count[word] += 1
       else:
          count[word] = 1
    return count

def merge_counters(counter1, counter2):
    return Counter(counter1) + Counter(counter2)
        

In [9]:
start_time = time.time()
# According to this: https://wiki.python.org/moin/GlobalInterpreterLock
# potentially blocking or long-running operations, such as I/O
# happen outside the GIL, so we don't need to put it inside ThreadPoolExecutor
filelist = map(read_file, os.listdir())
with ThreadPoolExecutor(6) as executor:
    file_word_counters = executor.map(count_unique_words, filelist)
total_word_counter = reduce(merge_counters, list(file_word_counters))

pprint(total_word_counter)
print("Time elapsed: %s sec" % (time.time() - start_time))

Counter({'32': 99604,
         '1': 99591,
         '98': 99564,
         '94': 99535,
         '48': 99494,
         '11': 99488,
         '40': 99438,
         '8': 99419,
         '39': 99416,
         '89': 99415,
         '55': 99388,
         '16': 99383,
         '58': 99369,
         '83': 99359,
         '3': 99356,
         '18': 99336,
         '80': 99331,
         '61': 99330,
         '41': 99321,
         '6': 99311,
         '30': 99299,
         '78': 99298,
         '9': 99267,
         '23': 99247,
         '74': 99246,
         '44': 99232,
         '95': 99230,
         '73': 99215,
         '22': 99211,
         '72': 99191,
         '13': 99184,
         '97': 99168,
         '57': 99137,
         '20': 99117,
         '91': 99115,
         '71': 99115,
         '7': 99113,
         '27': 99099,
         '60': 99099,
         '100': 99094,
         '35': 99071,
         '17': 99067,
         '96': 99057,
         '28': 99046,
         '62': 99046,
         '59': 

This is Multi-threaded implementation which using _map/reduce_ functionality.

We can compare elapsed time in all three implementation and then make a conclusion. 

***Time elapsed***
1. 1.9172611236572266 sec
2. 9.725282907485962 sec
3. 1.4893684387207031 sec (the winner)

Wrapping up, multi-threaded implementation is the fastest. It's event faster then the first one (but not all the time though =), which using better implementation of __count_unique_words__ function and event don't have __hard_computation__ execution inside every iteration.

Also, I would like to add, that using __ProcessPoolExecutor__ instead of __ThreadPoolExecutor__ in some cases is even worser, then using a solution without processes allocation. That's because we _spend more time to allocate resources for this processes rather than to compute the task_.