# filecmp - File and Directory Comparisons

In [10]:
import filecmp

help(filecmp)

Help on module filecmp:

NAME
    filecmp - Utilities for comparing files and directories.

MODULE REFERENCE
    https://docs.python.org/3.5/library/filecmp.html
    
    The following documentation is automatically generated from the Python
    source files.  It may be incomplete, incorrect or include features that
    are considered implementation detail and may vary between Python
    implementations.  When in doubt, consult the module reference at the
    location listed above.

DESCRIPTION
    Classes:
        dircmp
    
    Functions:
        cmp(f1, f2, shallow=True) -> int
        cmpfiles(a, b, common) -> ([], [], [])
        clear_cache()

CLASSES
    builtins.object
        dircmp
    
    class dircmp(builtins.object)
     |  A class that manages the comparison of 2 directories.
     |  
     |  dircmp(a, b, ignore=None, hide=None)
     |    A and B are directories.
     |    IGNORE is a list of names to ignore,
     |      defaults to DEFAULT_IGNORES.
     |    HIDE is a 

In [13]:
import filecmp

dir(filecmp)

['BUFSIZE',
 'DEFAULT_IGNORES',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_cache',
 '_cmp',
 '_do_cmp',
 '_filter',
 '_sig',
 'clear_cache',
 'cmp',
 'cmpfiles',
 'demo',
 'dircmp',
 'filterfalse',
 'os',
 'stat']

In [14]:
from filecmp import dircmp

dir(dircmp)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'methodmap',
 'phase0',
 'phase1',
 'phase2',
 'phase3',
 'phase4',
 'phase4_closure',
 'report',
 'report_full_closure',
 'report_partial_closure']

### cmp(f1, f2, shallow=True)
Compare file contents.

In [17]:
import filecmp

filecmp.clear_cache()
f1 = '/home/mana/Temp/task1/btheme.txt'
f2 = '/home/mana/Temp/task2/theme.txt'
filecmp.cmp(f1, f2)

True

### cmpfiles(dir1, dir2, common, shallow=True)
Compare the files in the two directories dir1 and dir2 whose names are given by common files.

Returns three lists of file names: match, mismatch, errors. 

match contains the list of files that match, mismatch contains the names of those that don’t, and 

errors lists the names of files which could not be compared. Files are listed in errors if they don’t exist in one of the directories, the user lacks permission to read them or if the comparison could not be done for some other reason.

In [24]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
filecmp.cmpfiles(d1, d2, ['theme.txt'])

(['theme.txt'], [], [])

### How to Check both directories files have same content?

In [6]:
from pathlib import Path
import filecmp

filecmp.clear_cache()
source = Path('/home/mana/test1')
dest = Path('/home/mana/test2')
matched, unmatched, err = filecmp.cmpfiles(str(source), str(dest), 
                             [i.name for i in source.rglob('*')])
print(matched, unmatched, err)

[] [] ['pack.txt']


### clear_cache()
Clear the filecmp cache. 

cmp function uses a cache for past comparisons and the results, with cache entries invalidated if the os.stat() information for the file changes. 

The entire cache may be cleared using clear_cache().

In [66]:
import filecmp

filecmp.clear_cache()
f1 = '/home/mana/Temp/task1/btheme.txt'
f2 = '/home/mana/Temp/task2/theme.txt'
filecmp.cmp(f1, f2)

True

## dircmp class



### dircmp(a, b, ignore=None, hide=None)
compare the directories a and b.

In [6]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
filecmp.dircmp(d1, d2).report()

diff /home/mana/Temp/task1 /home/mana/Temp/task2
Only in /home/mana/Temp/task1 : ['btheme.txt']
Only in /home/mana/Temp/task2 : ['Ad-Mate Menu Color', 'assets.svg', 'render-bits.sh', 'theme.txt']


### report()

In [12]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
filecmp.dircmp(d1, d2).report()

diff /home/mana/Temp/task1 /home/mana/Temp/task2
Only in /home/mana/Temp/task1 : ['btheme.txt']
Only in /home/mana/Temp/task2 : ['Ad-Mate Menu Color', 'assets.svg', 'render-bits.sh', 'theme.txt']


In [19]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.report()

diff /home/mana/Temp/task1 /home/mana/Temp/task2
Only in /home/mana/Temp/task1 : ['btheme.txt']
Only in /home/mana/Temp/task2 : ['assets.svg', 'render-bits.sh', 'theme.txt']


### report_partial_closure()
Print a comparison between a and b and common immediate subdirectories.


In [8]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
filecmp.dircmp(d1, d2).report_partial_closure()

diff /home/mana/Temp/task1 /home/mana/Temp/task2
Only in /home/mana/Temp/task1 : ['btheme.txt']
Only in /home/mana/Temp/task2 : ['Ad-Mate Menu Color', 'assets.svg', 'render-bits.sh', 'theme.txt']


### report_full_closure()
Print a comparison between a and b and common subdirectories (recursively).

In [9]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
filecmp.dircmp(d1, d2).report_full_closure()

diff /home/mana/Temp/task1 /home/mana/Temp/task2
Only in /home/mana/Temp/task1 : ['btheme.txt']
Only in /home/mana/Temp/task2 : ['Ad-Mate Menu Color', 'assets.svg', 'render-bits.sh', 'theme.txt']


## dircmp class variables

### left
The directory a.

In [15]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.left

'/home/mana/Temp/task1'

### right
The directory b.

In [18]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.right

'/home/mana/Temp/task2'

### left_list
Files and subdirectories in a, filtered by hide and ignore.

In [20]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.left_list

['btheme.txt']

### right_list
Files and subdirectories in b, filtered by hide and ignore.



In [21]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.right_list

['assets.svg', 'render-bits.sh', 'theme.txt']

### common_files
Files in both a and b.It checks only file name not contents.

In [40]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.common

['theme.txt']

### left_only
Files and subdirectories only in a.

In [35]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.left_only

[]

### right_only
Files and subdirectories only in b.



In [36]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.right_only

['render-bits.sh', 'assets.svg']

### common_dirs
Subdirectories in both a and b.



In [60]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.common_dirs

['rust']

### common_files
Files in both a and b.



In [43]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.common_files

['theme.txt']

### common_funny
Names in both a and b, such that the type differs between the directories, or names.


In [42]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.common_funny

[]

### same_files
Files which are identical(contents) in both a and b, using the class’s file comparison operator.



In [47]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.same_files

['theme.txt']

### diff_files
Files which are in both a and b, whose contents differ according to the class’s file comparison operator.

In [50]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.diff_files

[]

### funny_files
Files which are in both a and b, but could not be compared.



In [51]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.funny_files

[]

### subdirs
A dictionary mapping names in common_dirs to dircmp objects.



In [58]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
x.subdirs.values()

dict_values([])

# Excercise

### How to find duplicate files between 2 directories?
This is program also check identical contents.

In [61]:
import filecmp

filecmp.clear_cache()
d1 = '/home/mana/Temp/task1'
d2 = '/home/mana/Temp/task2'
x = filecmp.dircmp(d1, d2)
print('These files are same file name:')
print(*x.common, sep = ',')
print('')
print('These files are same contents:')
print(*x.same_files, sep = ',')

These files are same file name:
rust,theme.txt,Antiloop.mp3

These files are same contents:
theme.txt,Antiloop.mp3


### How to delete duplicate files to compare 2 directories?

In [76]:
import filecmp
from pathlib import Path

filecmp.clear_cache()
d1 = Path('/home/mana/Temp/task1')
d2 = Path('/home/mana/Temp/task2')
print('These same files are removed:')
x = filecmp.dircmp(str(d1), str(d2))
for i in  x.common and x.same_files:
    print(d1.joinpath(i))
    d1.joinpath(i).unlink()

These same files are removed:
/home/mana/Temp/task1/theme.txt
/home/mana/Temp/task1/Antiloop.mp3
