# Performance analysis of merging dictionaries

https://favtutor.com/blogs/merge-dictionaries-python

There are several solutions on how to merge a dictionary:

1) The update method   
2) The pipe (|)   
3) Using \*\*  
4) Unpacking the second dictionary   
5) The methon collection.ChainMap()   
6) Itertools.chain()   
7) Dictionary comprehensive   
8) Adding values of common keys   

## Preparing a set of dictionaries

In [2]:
import sys 
import os

In [3]:
os.chdir(Ch05)
sonnetfiles=os.listdir('sonnets')

NameError: name 'Ch05' is not defined

In [3]:
def process_file(file):
    proc_dict={}
    with open("sonnets/"+file) as f:
        for lineno, line in enumerate(f):
            for word in line.split():
                # strip punctuation, quotes, ..
                clean_word = word.strip(',.:;\'?').lower()
                location = file + '/' + str(lineno+1)
                #print(clean_word, location)
                if clean_word not in proc_dict:
                    proc_dict[clean_word] = [location]
                else:
                    proc_dict[clean_word].append(location)
    return proc_dict

In [4]:
dict_list = []

for sonnetfile in sonnetfiles:
    dict_list.append(process_file(sonnetfile))
    
print("dictionary 0:\n", dict_list[0])
print("Entry count:",len(dict_list[0]))
print()
print()
print("dictionary 1:\n", dict_list[1])
print("Entry count:",len(dict_list[1]))
print()
print()
print("Dictionary keys in dict_list[0] AND dict_list[1]:\n")
overlap_count = 0
for key in dict_list[0].keys():
    if key in dict_list[1]:
        overlap_count += 1
        print(key)
print("Entry count:", overlap_count)
print()
print("Merged dictionary with unique values:", len(dict_list[0]), "+", len(dict_list[1]), "-", overlap_count, "=", len(dict_list[0])+len(dict_list[1])-overlap_count)

dictionary 0:
 {'whoever': ['CXXXV.txt/1'], 'hath': ['CXXXV.txt/1'], 'her': ['CXXXV.txt/1'], 'wish': ['CXXXV.txt/1'], 'thou': ['CXXXV.txt/1', 'CXXXV.txt/5', 'CXXXV.txt/11'], 'hast': ['CXXXV.txt/1'], 'thy': ['CXXXV.txt/1', 'CXXXV.txt/4', 'CXXXV.txt/11', 'CXXXV.txt/12'], 'will': ['CXXXV.txt/1', 'CXXXV.txt/2', 'CXXXV.txt/2', 'CXXXV.txt/4', 'CXXXV.txt/5', 'CXXXV.txt/6', 'CXXXV.txt/7', 'CXXXV.txt/8', 'CXXXV.txt/11', 'CXXXV.txt/11', 'CXXXV.txt/12', 'CXXXV.txt/12', 'CXXXV.txt/14'], 'and': ['CXXXV.txt/2', 'CXXXV.txt/2', 'CXXXV.txt/5', 'CXXXV.txt/8', 'CXXXV.txt/10', 'CXXXV.txt/14'], 'to': ['CXXXV.txt/2', 'CXXXV.txt/4', 'CXXXV.txt/6', 'CXXXV.txt/10', 'CXXXV.txt/11', 'CXXXV.txt/12'], 'boot': ['CXXXV.txt/2'], 'in': ['CXXXV.txt/2', 'CXXXV.txt/6', 'CXXXV.txt/7', 'CXXXV.txt/8', 'CXXXV.txt/10', 'CXXXV.txt/11', 'CXXXV.txt/14'], 'overplus': ['CXXXV.txt/2'], 'more': ['CXXXV.txt/3', 'CXXXV.txt/12'], 'than': ['CXXXV.txt/3'], 'enough': ['CXXXV.txt/3'], 'am': ['CXXXV.txt/3'], 'i': ['CXXXV.txt/3'], 'that': ['

## 1. The update method

In [5]:
def UpdateMerge(dict1, dict2):
    mydict = {}
    mydict.update(dict1)
    mydict.update(dict2)
    return mydict
    

In [6]:
%timeit UpdateMerge(dict_list[0], dict_list[1])
mergedict = UpdateMerge(dict_list[0], dict_list[1])
print(len(mergedict))

2.05 µs ± 24.3 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
132


## 2. The pipe 

In [7]:
def PipeMerge(dict1, dict2):
    mydict = dict1 | dict2
    return mydict

In [8]:
%timeit PipeMerge(dict_list[0], dict_list[1])
mergedict = PipeMerge(dict_list[0], dict_list[1])
print(len(mergedict))

1.37 µs ± 9.56 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
132


## 3. Using **

In [9]:
def ExpandMerge(dict1, dict2):
    mydict = {**dict1, **dict2}
    return mydict

In [10]:
%timeit mergedict = ExpandMerge(dict_list[0], dict_list[1])
print(len(mergedict))

2.03 µs ± 30.3 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
132


## 4. Unpacking the second dictionaries

In [11]:
def UnpackMerge(dict1, dict2):
    mydict = dict(dict1, **dict2)
    return mydict

In [12]:
%timeit UnpackMerge(dict_list[0], dict_list[1])
mergedict = UnpackMerge(dict_list[0], dict_list[1])
print(len(mergedict))

3.3 µs ± 56.7 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
132


## 5. The methon collection.ChainMap()

In [13]:
from collections import ChainMap

In [14]:
def ChainMapMerge(dict1, dict2):
    mydict = ChainMap(dict1, dict2)
    return mydict

In [15]:
%timeit ChainMapMerge(dict_list[0], dict_list[1])
mergedict = ChainMapMerge(dict_list[0], dict_list[1])
print(len(mergedict))

245 ns ± 3.29 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
132


## 6. Itertools.chain()

In [16]:
import itertools

In [17]:
def IterChainMerge(dict1, dict2):
    mydict = dict(itertools.chain(dict1.items(),dict2.items()))
    return mydict

In [18]:
%timeit IterChainMerge(dict_list[0], dict_list[1])
mergedict = IterChainMerge(dict_list[0], dict_list[1])
print(len(mergedict))

3.63 µs ± 62.9 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
132


## 7. Dictionary comprehensive

In [19]:
def ComprehensiveMerge(dict1, dict2):
    mydict = {k:v for d in (dict1,dict2) for k,v in d.items()}
    return mydict

In [20]:
%timeit ComprehensiveMerge(dict_list[0], dict_list[1])
mergedict = ComprehensiveMerge(dict_list[0], dict_list[1])
print(len(mergedict))

5.98 µs ± 65.7 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
132


## 8. Adding values of common keys

In [21]:
def ListMerge(dict1, dict2):
    mydict = {**dict1, **dict2}
    for key, value in mydict.items():
        if key in dict1 and key in dict2:
               mydict[key] = [value , dict1[key]]
    return mydict

In [22]:
%timeit ListMerge(dict_list[0], dict_list[1])
mergedict = ListMerge(dict_list[0], dict_list[1])
print(len(mergedict))

7.69 µs ± 129 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
132
